├── .gitignore ├── Datasets ├── File_List │ ├── TSB-AD-M-Eva.csv │ ├── TSB-AD-M-Tuning.csv │ ├── TSB-AD-M.csv │ ├── TSB-AD-U-Eva-Full.csv │ ├── TSB-AD-U-Eva.csv │ ├── TSB-AD-U-Tuning.csv │ └── TSB-AD-U.csv ├── README.md ├── TSB-AD-M │ └── 057_SMD_id_1_Facility_tr_4529_1st_4629.csv └── TSB-AD-U │ └── 001_NAB_id_1_Facility_tr_1007_1st_2014.csv ├── LICENSE ├── README.md ├── TSB_AD ├── HP_list.py ├── __init__.py ├── evaluation │ ├── __init__.py │ ├── affiliation │ │ ├── __init__.py │ │ ├── _affiliation_zone.py │ │ ├── _integral_interval.py │ │ ├── _single_ground_truth_event.py │ │ ├── generics.py │ │ └── metrics.py │ ├── basic_metrics.py │ ├── metrics.py │ └── visualize.py ├── main.py ├── model_wrapper.py ├── models │ ├── AE.py │ ├── AnomalyTransformer.py │ ├── CBLOF.py │ ├── CNN.py │ ├── COF.py │ ├── COPOD.py │ ├── Chronos.py │ ├── Donut.py │ ├── EIF.py │ ├── FFT.py │ ├── FITS.py │ ├── HBOS.py │ ├── IForest.py │ ├── KMeansAD.py │ ├── KNN.py │ ├── LOF.py │ ├── LSTMAD.py │ ├── Lag_Llama.py │ ├── Left_STAMPi.py │ ├── M2N2.py │ ├── MCD.py │ ├── MOMENT.py │ ├── MatrixProfile.py │ ├── NormA.txt │ ├── OCSVM.py │ ├── OFA.py │ ├── OmniAnomaly.py │ ├── PCA.py │ ├── POLY.py │ ├── README.md │ ├── RobustPCA.py │ ├── SAND.py │ ├── SR.py │ ├── Series2Graph.txt │ ├── TimesFM.py │ ├── TimesNet.py │ ├── TranAD.py │ ├── USAD.py │ ├── __init__.py │ ├── base.py │ ├── distance.py │ └── feature.py └── utils │ ├── __init__.py │ ├── dataset.py │ ├── slidingWindows.py │ ├── stat_models.py │ ├── torch_utility.py │ └── utility.py ├── assets └── fig │ ├── readme_title.png │ ├── readme_title_2.png │ └── tsb_overview.png ├── benchmark_exp ├── HP_Tuning_M.py ├── HP_Tuning_U.py ├── README.md ├── Run_Custom_Detector.py ├── Run_Detector_M.py ├── Run_Detector_U.py ├── analysis.ipynb ├── benchmark_eval_results │ ├── README.md │ ├── multi_mergedTable_VUS-PR.csv │ └── uni_mergedTable_VUS-PR.csv └── visualize_ts.ipynb ├── docs ├── index.html └── static │ ├── .DS_Store │ ├── css │ ├── bulma-carousel.min.css │ ├── bulma-slider.min.css │ ├── bulma.css.map.txt │ ├── bulma.min.css │ ├── fontawesome.all.min.css │ └── index.css │ ├── images │ ├── .DS_Store │ ├── elephant.svg │ └── tsb_overview.png │ ├── js │ ├── bulma-carousel.js │ ├── bulma-carousel.min.js │ ├── bulma-slider.js │ ├── bulma-slider.min.js │ ├── fontawesome.all.min.js │ └── index.js │ ├── leaderboard │ ├── TSB-AD-M.html │ └── TSB-AD-U.html │ └── pdfs │ └── TSB-AD-NeurIPS24.pdf ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.egg-info 3 | build 4 | run_scripts 5 | Datasets/TSB-AD-Datasets 6 | TSB_AD/models/NormA.py 7 | TSB_AD/models/Series2Graph.py -------------------------------------------------------------------------------- /Datasets/File_List/TSB-AD-M-Eva.csv: -------------------------------------------------------------------------------- 1 | file_name 2 | 001_Genesis_id_1_Sensor_tr_4055_1st_15538.csv 3 | 002_MSL_id_1_Sensor_tr_500_1st_900.csv 4 | 003_MSL_id_2_Sensor_tr_883_1st_1238.csv 5 | 005_MSL_id_4_Sensor_tr_855_1st_2700.csv 6 | 006_MSL_id_5_Sensor_tr_1150_1st_1250.csv 7 | 007_MSL_id_6_Sensor_tr_980_1st_3550.csv 8 | 008_MSL_id_7_Sensor_tr_656_1st_1630.csv 9 | 009_MSL_id_8_Sensor_tr_714_1st_1390.csv 10 | 010_MSL_id_9_Sensor_tr_554_1st_1172.csv 11 | 012_MSL_id_11_Sensor_tr_539_1st_940.csv 12 | 013_MSL_id_12_Sensor_tr_554_1st_1200.csv 13 | 014_MSL_id_13_Sensor_tr_1525_1st_4575.csv 14 | 015_MSL_id_14_Sensor_tr_575_1st_1250.csv 15 | 016_MSL_id_15_Sensor_tr_500_1st_780.csv 16 | 017_MSL_id_16_Sensor_tr_512_1st_1850.csv 17 | 018_Daphnet_id_1_HumanActivity_tr_9693_1st_20732.csv 18 | 019_MITDB_id_1_Medical_tr_37500_1st_103211.csv 19 | 020_MITDB_id_2_Medical_tr_50000_1st_52315.csv 20 | 021_MITDB_id_3_Medical_tr_50000_1st_57595.csv 21 | 022_MITDB_id_4_Medical_tr_50000_1st_54253.csv 22 | 024_MITDB_id_6_Medical_tr_50000_1st_58118.csv 23 | 025_MITDB_id_7_Medical_tr_37500_1st_88864.csv 24 | 026_MITDB_id_8_Medical_tr_30495_1st_30595.csv 25 | 027_MITDB_id_9_Medical_tr_25000_1st_52255.csv 26 | 029_MITDB_id_11_Medical_tr_50000_1st_518037.csv 27 | 030_MITDB_id_12_Medical_tr_50000_1st_84572.csv 28 | 031_MITDB_id_13_Medical_tr_50000_1st_79433.csv 29 | 032_GHL_id_1_Sensor_tr_50000_1st_65001.csv 30 | 033_GHL_id_2_Sensor_tr_50000_1st_51001.csv 31 | 034_GHL_id_3_Sensor_tr_50000_1st_122001.csv 32 | 035_GHL_id_4_Sensor_tr_50000_1st_90001.csv 33 | 036_GHL_id_5_Sensor_tr_50000_1st_67147.csv 34 | 037_GHL_id_6_Sensor_tr_50000_1st_80001.csv 35 | 038_GHL_id_7_Sensor_tr_50000_1st_100001.csv 36 | 039_GHL_id_8_Sensor_tr_50000_1st_63030.csv 37 | 041_GHL_id_10_Sensor_tr_50000_1st_57001.csv 38 | 042_GHL_id_11_Sensor_tr_50000_1st_150001.csv 39 | 043_GHL_id_12_Sensor_tr_39938_1st_40038.csv 40 | 044_GHL_id_13_Sensor_tr_50000_1st_145001.csv 41 | 045_GHL_id_14_Sensor_tr_50000_1st_85076.csv 42 | 046_GHL_id_15_Sensor_tr_50000_1st_156462.csv 43 | 047_GHL_id_16_Sensor_tr_50000_1st_77001.csv 44 | 048_GHL_id_17_Sensor_tr_50000_1st_154001.csv 45 | 050_GHL_id_19_Sensor_tr_43750_1st_55001.csv 46 | 051_GHL_id_20_Sensor_tr_50000_1st_75110.csv 47 | 052_GHL_id_21_Sensor_tr_50000_1st_98001.csv 48 | 053_GHL_id_22_Sensor_tr_50000_1st_126448.csv 49 | 054_GHL_id_23_Sensor_tr_50000_1st_135001.csv 50 | 055_GHL_id_24_Sensor_tr_50000_1st_118124.csv 51 | 056_GHL_id_25_Sensor_tr_50000_1st_105568.csv 52 | 057_SMD_id_1_Facility_tr_4529_1st_4629.csv 53 | 058_SMD_id_2_Facility_tr_1087_1st_1187.csv 54 | 059_SMD_id_3_Facility_tr_757_1st_857.csv 55 | 060_SMD_id_4_Facility_tr_7176_1st_10609.csv 56 | 061_SMD_id_5_Facility_tr_7176_1st_15144.csv 57 | 063_SMD_id_7_Facility_tr_5923_1st_6506.csv 58 | 064_SMD_id_8_Facility_tr_2272_1st_2372.csv 59 | 065_SMD_id_9_Facility_tr_737_1st_837.csv 60 | 066_SMD_id_10_Facility_tr_2634_1st_2734.csv 61 | 067_SMD_id_11_Facility_tr_1980_1st_2080.csv 62 | 068_SMD_id_12_Facility_tr_1099_1st_1199.csv 63 | 069_SMD_id_13_Facility_tr_5925_1st_12760.csv 64 | 070_SMD_id_14_Facility_tr_1070_1st_1170.csv 65 | 071_SMD_id_15_Facility_tr_1109_1st_1209.csv 66 | 073_SMD_id_17_Facility_tr_5926_1st_10620.csv 67 | 074_SMD_id_18_Facility_tr_7174_1st_21230.csv 68 | 075_SMD_id_19_Facility_tr_564_1st_664.csv 69 | 076_SMD_id_20_Facility_tr_5925_1st_17580.csv 70 | 077_SMD_id_21_Facility_tr_3026_1st_3126.csv 71 | 078_SMD_id_22_Facility_tr_500_1st_326.csv 72 | 079_LTDB_id_1_Medical_tr_3618_1st_3718.csv 73 | 080_LTDB_id_2_Medical_tr_500_1st_266.csv 74 | 081_LTDB_id_3_Medical_tr_500_1st_26.csv 75 | 083_LTDB_id_5_Medical_tr_15502_1st_15602.csv 76 | 084_SVDB_id_1_Medical_tr_12973_1st_13073.csv 77 | 085_SVDB_id_2_Medical_tr_50000_1st_54982.csv 78 | 086_SVDB_id_3_Medical_tr_533_1st_633.csv 79 | 087_SVDB_id_4_Medical_tr_5421_1st_5521.csv 80 | 088_SVDB_id_5_Medical_tr_500_1st_168.csv 81 | 089_SVDB_id_6_Medical_tr_50000_1st_83083.csv 82 | 091_SVDB_id_8_Medical_tr_500_1st_126.csv 83 | 092_SVDB_id_9_Medical_tr_2674_1st_2774.csv 84 | 093_SVDB_id_10_Medical_tr_500_1st_37.csv 85 | 094_SVDB_id_11_Medical_tr_20100_1st_23611.csv 86 | 095_SVDB_id_12_Medical_tr_6264_1st_6364.csv 87 | 096_SVDB_id_13_Medical_tr_500_1st_444.csv 88 | 097_SVDB_id_14_Medical_tr_1031_1st_1131.csv 89 | 098_SVDB_id_15_Medical_tr_500_1st_303.csv 90 | 099_SVDB_id_16_Medical_tr_1115_1st_1215.csv 91 | 100_SVDB_id_17_Medical_tr_17803_1st_17903.csv 92 | 101_SVDB_id_18_Medical_tr_1851_1st_1951.csv 93 | 102_SVDB_id_19_Medical_tr_50000_1st_107354.csv 94 | 103_SVDB_id_20_Medical_tr_12167_1st_12267.csv 95 | 104_SVDB_id_21_Medical_tr_50000_1st_106634.csv 96 | 105_SVDB_id_22_Medical_tr_500_1st_417.csv 97 | 106_SVDB_id_23_Medical_tr_37500_1st_121963.csv 98 | 108_SVDB_id_25_Medical_tr_1238_1st_1338.csv 99 | 109_SVDB_id_26_Medical_tr_1038_1st_1138.csv 100 | 110_SVDB_id_27_Medical_tr_30824_1st_30924.csv 101 | 111_SVDB_id_28_Medical_tr_1791_1st_1891.csv 102 | 112_SVDB_id_29_Medical_tr_8226_1st_8326.csv 103 | 114_SVDB_id_31_Medical_tr_25000_1st_63090.csv 104 | 115_PSM_id_1_Facility_tr_50000_1st_129872.csv 105 | 116_TAO_id_1_Environment_tr_500_1st_3.csv 106 | 117_TAO_id_2_Environment_tr_500_1st_1.csv 107 | 118_TAO_id_3_Environment_tr_500_1st_7.csv 108 | 119_TAO_id_4_Environment_tr_500_1st_1.csv 109 | 121_TAO_id_6_Environment_tr_500_1st_7.csv 110 | 122_TAO_id_7_Environment_tr_500_1st_19.csv 111 | 123_TAO_id_8_Environment_tr_500_1st_62.csv 112 | 124_TAO_id_9_Environment_tr_500_1st_1.csv 113 | 125_TAO_id_10_Environment_tr_500_1st_9.csv 114 | 127_TAO_id_12_Environment_tr_500_1st_24.csv 115 | 128_TAO_id_13_Environment_tr_500_1st_7.csv 116 | 129_OPPORTUNITY_id_1_HumanActivity_tr_1801_1st_1901.csv 117 | 130_OPPORTUNITY_id_2_HumanActivity_tr_1045_1st_1145.csv 118 | 132_OPPORTUNITY_id_4_HumanActivity_tr_895_1st_995.csv 119 | 133_OPPORTUNITY_id_5_HumanActivity_tr_1745_1st_6500.csv 120 | 134_OPPORTUNITY_id_6_HumanActivity_tr_1477_1st_1577.csv 121 | 135_OPPORTUNITY_id_7_HumanActivity_tr_2085_1st_2185.csv 122 | 136_OPPORTUNITY_id_8_HumanActivity_tr_1495_1st_1595.csv 123 | 137_CreditCard_id_1_Finance_tr_500_1st_541.csv 124 | 138_CATSv2_id_1_Sensor_tr_16568_1st_16668.csv 125 | 139_CATSv2_id_2_Sensor_tr_5592_1st_5692.csv 126 | 141_CATSv2_id_4_Sensor_tr_41727_1st_41827.csv 127 | 142_CATSv2_id_5_Sensor_tr_30704_1st_30804.csv 128 | 143_CATSv2_id_6_Sensor_tr_50000_1st_60232.csv 129 | 144_SMAP_id_1_Sensor_tr_2052_1st_5300.csv 130 | 145_SMAP_id_2_Sensor_tr_2133_1st_5400.csv 131 | 146_SMAP_id_3_Sensor_tr_2128_1st_5000.csv 132 | 147_SMAP_id_4_Sensor_tr_2160_1st_6449.csv 133 | 148_SMAP_id_5_Sensor_tr_2011_1st_5060.csv 134 | 150_SMAP_id_7_Sensor_tr_2077_1st_5394.csv 135 | 151_SMAP_id_8_Sensor_tr_1971_1st_4870.csv 136 | 152_SMAP_id_9_Sensor_tr_2073_1st_5600.csv 137 | 153_SMAP_id_10_Sensor_tr_1840_1st_4030.csv 138 | 154_SMAP_id_11_Sensor_tr_2117_1st_4770.csv 139 | 155_SMAP_id_12_Sensor_tr_1907_1st_4800.csv 140 | 156_SMAP_id_13_Sensor_tr_1173_1st_2750.csv 141 | 157_SMAP_id_14_Sensor_tr_2126_1st_5000.csv 142 | 158_SMAP_id_15_Sensor_tr_2075_1st_5610.csv 143 | 159_SMAP_id_16_Sensor_tr_1757_1st_2650.csv 144 | 160_SMAP_id_17_Sensor_tr_1832_1st_5300.csv 145 | 161_SMAP_id_18_Sensor_tr_2075_1st_5550.csv 146 | 162_SMAP_id_19_Sensor_tr_1908_1st_4690.csv 147 | 163_SMAP_id_20_Sensor_tr_2051_1st_4575.csv 148 | 165_SMAP_id_22_Sensor_tr_2129_1st_5000.csv 149 | 166_SMAP_id_23_Sensor_tr_1113_1st_1890.csv 150 | 167_SMAP_id_24_Sensor_tr_2094_1st_5600.csv 151 | 168_SMAP_id_25_Sensor_tr_1998_1st_2098.csv 152 | 169_SMAP_id_26_Sensor_tr_1811_1st_4510.csv 153 | 170_SMAP_id_27_Sensor_tr_2160_1st_4690.csv 154 | 171_SWaT_id_1_Sensor_tr_3749_1st_9522.csv 155 | 172_SWaT_id_2_Sensor_tr_23700_1st_23800.csv 156 | 173_GECCO_id_1_Sensor_tr_16165_1st_16265.csv 157 | 174_Exathlon_id_1_Facility_tr_10766_1st_12590.csv 158 | 175_Exathlon_id_2_Facility_tr_10684_1st_10784.csv 159 | 176_Exathlon_id_3_Facility_tr_10766_1st_12590.csv 160 | 177_Exathlon_id_4_Facility_tr_11665_1st_13484.csv 161 | 179_Exathlon_id_6_Facility_tr_11665_1st_13484.csv 162 | 180_Exathlon_id_7_Facility_tr_10766_1st_12590.csv 163 | 181_Exathlon_id_8_Facility_tr_11663_1st_13482.csv 164 | 182_Exathlon_id_9_Facility_tr_10766_1st_12590.csv 165 | 183_Exathlon_id_10_Facility_tr_11665_1st_13484.csv 166 | 184_Exathlon_id_11_Facility_tr_11665_1st_13484.csv 167 | 185_Exathlon_id_12_Facility_tr_11665_1st_13484.csv 168 | 186_Exathlon_id_13_Facility_tr_10766_1st_12590.csv 169 | 187_Exathlon_id_14_Facility_tr_6193_1st_6293.csv 170 | 188_Exathlon_id_15_Facility_tr_12538_1st_12638.csv 171 | 189_Exathlon_id_16_Facility_tr_11663_1st_13482.csv 172 | 190_Exathlon_id_17_Facility_tr_12538_1st_12638.csv 173 | 191_Exathlon_id_18_Facility_tr_11665_1st_13484.csv 174 | 192_Exathlon_id_19_Facility_tr_11665_1st_13484.csv 175 | 193_Exathlon_id_20_Facility_tr_8898_1st_8998.csv 176 | 194_Exathlon_id_21_Facility_tr_6985_1st_7085.csv 177 | 196_Exathlon_id_23_Facility_tr_11665_1st_13484.csv 178 | 197_Exathlon_id_24_Facility_tr_10766_1st_12590.csv 179 | 198_Exathlon_id_25_Facility_tr_12538_1st_12638.csv 180 | 199_Exathlon_id_26_Facility_tr_12538_1st_12638.csv 181 | 200_Exathlon_id_27_Facility_tr_10766_1st_12590.csv 182 | -------------------------------------------------------------------------------- /Datasets/File_List/TSB-AD-M-Tuning.csv: -------------------------------------------------------------------------------- 1 | file_name 2 | 004_MSL_id_3_Sensor_tr_530_1st_630.csv 3 | 011_MSL_id_10_Sensor_tr_1525_1st_4590.csv 4 | 023_MITDB_id_5_Medical_tr_25000_1st_36913.csv 5 | 028_MITDB_id_10_Medical_tr_37500_1st_39948.csv 6 | 040_GHL_id_9_Sensor_tr_50000_1st_92001.csv 7 | 049_GHL_id_18_Sensor_tr_50000_1st_109001.csv 8 | 062_SMD_id_6_Facility_tr_7180_1st_15131.csv 9 | 072_SMD_id_16_Facility_tr_7119_1st_15849.csv 10 | 082_LTDB_id_4_Medical_tr_4456_1st_4556.csv 11 | 090_SVDB_id_7_Medical_tr_12157_1st_12257.csv 12 | 107_SVDB_id_24_Medical_tr_32805_1st_32905.csv 13 | 113_SVDB_id_30_Medical_tr_4552_1st_4652.csv 14 | 120_TAO_id_5_Environment_tr_500_1st_3.csv 15 | 126_TAO_id_11_Environment_tr_500_1st_7.csv 16 | 131_OPPORTUNITY_id_3_HumanActivity_tr_7016_1st_26691.csv 17 | 140_CATSv2_id_3_Sensor_tr_28307_1st_28407.csv 18 | 149_SMAP_id_6_Sensor_tr_2128_1st_5000.csv 19 | 164_SMAP_id_21_Sensor_tr_1976_1st_4200.csv 20 | 178_Exathlon_id_5_Facility_tr_12538_1st_12638.csv 21 | 195_Exathlon_id_22_Facility_tr_10766_1st_12590.csv -------------------------------------------------------------------------------- /Datasets/File_List/TSB-AD-U-Tuning.csv: -------------------------------------------------------------------------------- 1 | file_name 2 | 003_NAB_id_3_WebService_tr_1362_1st_1462.csv 3 | 004_NAB_id_4_Facility_tr_1007_1st_1437.csv 4 | 007_NAB_id_7_Traffic_tr_624_1st_2087.csv 5 | 012_NAB_id_12_Synthetic_tr_1007_1st_2787.csv 6 | 021_NAB_id_21_WebService_tr_500_1st_565.csv 7 | 033_WSD_id_5_WebService_tr_4559_1st_12588.csv 8 | 038_WSD_id_10_WebService_tr_4042_1st_4142.csv 9 | 045_WSD_id_17_WebService_tr_2566_1st_2666.csv 10 | 053_WSD_id_25_WebService_tr_4559_1st_9198.csv 11 | 070_WSD_id_42_WebService_tr_2102_1st_2202.csv 12 | 142_MSL_id_3_Sensor_tr_1525_1st_4575.csv 13 | 146_MSL_id_7_Sensor_tr_554_1st_1172.csv 14 | 152_Stock_id_4_Finance_tr_500_1st_2.csv 15 | 161_Stock_id_13_Finance_tr_500_1st_3.csv 16 | 174_MITDB_id_5_Medical_tr_50000_1st_539948.csv 17 | 182_SMD_id_5_Facility_tr_7174_1st_21230.csv 18 | 186_SMD_id_9_Facility_tr_5925_1st_17580.csv 19 | 193_SMD_id_16_Facility_tr_5925_1st_17580.csv 20 | 200_SMD_id_23_Facility_tr_7174_1st_21230.csv 21 | 210_SMD_id_33_Facility_tr_5925_1st_17580.csv 22 | 217_LTDB_id_2_Medical_tr_500_1st_266.csv 23 | 230_MGAB_id_6_Synthetic_tr_25000_1st_42441.csv 24 | 235_SED_id_2_Medical_tr_2499_1st_3840.csv 25 | 242_SVDB_id_6_Medical_tr_10726_1st_10826.csv 26 | 252_SVDB_id_16_Medical_tr_12167_1st_12267.csv 27 | 259_TAO_id_3_Environment_tr_500_1st_7.csv 28 | 265_IOPS_id_6_WebService_tr_6453_1st_6553.csv 29 | 270_IOPS_id_11_WebService_tr_5638_1st_5738.csv 30 | 280_NEK_id_4_WebService_tr_500_1st_231.csv 31 | 291_TODS_id_5_Synthetic_tr_500_1st_11.csv 32 | 300_TODS_id_14_Synthetic_tr_1250_1st_2555.csv 33 | 305_UCR_id_3_Medical_tr_3000_1st_5948.csv 34 | 312_UCR_id_10_Facility_tr_19948_1st_52000.csv 35 | 323_UCR_id_21_HumanActivity_tr_48812_1st_128430.csv 36 | 347_UCR_id_45_Sensor_tr_2851_1st_5365.csv 37 | 386_UCR_id_84_Environment_tr_2046_1st_5703.csv 38 | 429_UCR_id_127_Medical_tr_14825_1st_29000.csv 39 | 535_SMAP_id_5_Sensor_tr_1113_1st_1890.csv 40 | 543_SMAP_id_13_Sensor_tr_2020_1st_4550.csv 41 | 560_YAHOO_id_10_Synthetic_tr_500_1st_893.csv 42 | 568_YAHOO_id_18_WebService_tr_500_1st_333.csv 43 | 573_YAHOO_id_23_Synthetic_tr_500_1st_623.csv 44 | 583_YAHOO_id_33_WebService_tr_500_1st_1616.csv 45 | 643_YAHOO_id_93_WebService_tr_500_1st_1038.csv 46 | 813_Exathlon_id_4_Facility_tr_10766_1st_12590.csv 47 | 825_Exathlon_id_16_Facility_tr_10766_1st_12590.csv 48 | 852_OPPORTUNITY_id_11_HumanActivity_tr_500_1st_566.csv 49 | 864_OPPORTUNITY_id_23_HumanActivity_tr_895_1st_995.csv -------------------------------------------------------------------------------- /Datasets/README.md: -------------------------------------------------------------------------------- 1 | Provide example time series at TSB-AD-U/M Folder 2 | 3 | Link to the dataset: 4 | 5 | * TSB-AD-U: https://www.thedatum.org/datasets/TSB-AD-U.zip 6 | 7 | * TSB-AD-M: https://www.thedatum.org/datasets/TSB-AD-M.zip 8 | 9 | > Disclaimer: The dataset is released for reproducibility purposes. The preprocessing and curation steps are provided under the Apache 2.0 license. If you use any of these datasets in your research, please refer to the original data source. License information for each dataset included in TSB-AD is provided at [[Link]](https://thedatumorg.github.io/TSB-AD/) for your reference. 10 | 11 | * File Name Formatting: [index]\_[Dataset Name]\_id\_[id]\_[Domain]\_tr\_[Train Index]\_1st\_[First Anomaly Index].csv 12 | * Domain ⊆ {Web Service, Sensor, Environment, Traffic, Finance, Facility, Medical, Synthetic} 13 | * Folder Description: `TSB-AD-U/M` contain univariate and multivariate time series respectively. `File-List` contains file lists splitting for evaluation and hyperparameter tunning. 14 | -------------------------------------------------------------------------------- /TSB_AD/HP_list.py: -------------------------------------------------------------------------------- 1 | 2 | Multi_algo_HP_dict = { 3 | 'IForest': { 4 | 'n_estimators': [25, 50, 100, 150, 200], 5 | 'max_features': [0.2, 0.4, 0.6, 0.8, 1.0] 6 | }, 7 | 'LOF': { 8 | 'n_neighbors': [10, 20, 30, 40, 50], 9 | 'metric': ['minkowski', 'manhattan', 'euclidean'] 10 | }, 11 | 'PCA': { 12 | 'n_components': [0.25, 0.5, 0.75, None] 13 | }, 14 | 'HBOS': { 15 | 'n_bins': [5, 10, 20, 30, 40], 16 | 'tol': [0.1, 0.3, 0.5, 0.7] 17 | }, 18 | 'OCSVM': { 19 | 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 20 | 'nu': [0.1, 0.3, 0.5, 0.7] 21 | }, 22 | 'MCD': { 23 | 'support_fraction': [0.2, 0.4, 0.6, 0.8, None] 24 | }, 25 | 'KNN': { 26 | 'n_neighbors': [10, 20, 30, 40, 50], 27 | 'method': ['largest', 'mean', 'median'] 28 | }, 29 | 'KMeansAD': { 30 | 'n_clusters': [10, 20, 30, 40], 31 | 'window_size': [10, 20, 30, 40] 32 | }, 33 | 'COPOD': { 34 | 'HP': [None] 35 | }, 36 | 'CBLOF': { 37 | 'n_clusters': [4, 8, 16, 32], 38 | 'alpha': [0.6, 0.7, 0.8, 0.9] 39 | }, 40 | 'EIF': { 41 | 'n_trees': [25, 50, 100, 200] 42 | }, 43 | 'RobustPCA': { 44 | 'max_iter': [500, 1000, 1500] 45 | }, 46 | 'AutoEncoder': { 47 | 'hidden_neurons': [[64, 32], [32, 16], [128, 64]] 48 | }, 49 | 'CNN': { 50 | 'window_size': [50, 100, 150], 51 | 'num_channel': [[32, 32, 40], [16, 32, 64]] 52 | }, 53 | 'LSTMAD': { 54 | 'window_size': [50, 100, 150], 55 | 'lr': [0.0004, 0.0008] 56 | }, 57 | 'TranAD': { 58 | 'win_size': [5, 10, 50], 59 | 'lr': [1e-3, 1e-4] 60 | }, 61 | 'AnomalyTransformer': { 62 | 'win_size': [50, 100, 150], 63 | 'lr': [1e-3, 1e-4, 1e-5] 64 | }, 65 | 'OmniAnomaly': { 66 | 'win_size': [5, 50, 100], 67 | 'lr': [0.002, 0.0002] 68 | }, 69 | 'USAD': { 70 | 'win_size': [5, 50, 100], 71 | 'lr': [1e-3, 1e-4, 1e-5] 72 | }, 73 | 'Donut': { 74 | 'win_size': [60, 90, 120], 75 | 'lr': [1e-3, 1e-4, 1e-5] 76 | }, 77 | 'TimesNet': { 78 | 'win_size': [32, 96, 192], 79 | 'lr': [1e-3, 1e-4, 1e-5] 80 | }, 81 | 'FITS': { 82 | 'win_size': [100, 200], 83 | 'lr': [1e-3, 1e-4, 1e-5] 84 | }, 85 | 'OFA': { 86 | 'win_size': [50, 100, 150] 87 | } 88 | } 89 | 90 | 91 | Optimal_Multi_algo_HP_dict = { 92 | 'IForest': {'n_estimators': 25, 'max_features': 0.8}, 93 | 'LOF': {'n_neighbors': 50, 'metric': 'euclidean'}, 94 | 'PCA': {'n_components': 0.25}, 95 | 'HBOS': {'n_bins': 30, 'tol': 0.5}, 96 | 'OCSVM': {'kernel': 'rbf', 'nu': 0.1}, 97 | 'MCD': {'support_fraction': 0.8}, 98 | 'KNN': {'n_neighbors': 50, 'method': 'mean'}, 99 | 'KMeansAD': {'n_clusters': 10, 'window_size': 40}, 100 | 'KShapeAD': {'n_clusters': 20, 'window_size': 40}, 101 | 'COPOD': {'n_jobs':1}, 102 | 'CBLOF': {'n_clusters': 4, 'alpha': 0.6}, 103 | 'EIF': {'n_trees': 50}, 104 | 'RobustPCA': {'max_iter': 1000}, 105 | 'AutoEncoder': {'hidden_neurons': [128, 64]}, 106 | 'CNN': {'window_size': 50, 'num_channel': [32, 32, 40]}, 107 | 'LSTMAD': {'window_size': 150, 'lr': 0.0008}, 108 | 'TranAD': {'win_size': 10, 'lr': 0.001}, 109 | 'AnomalyTransformer': {'win_size': 50, 'lr': 0.001}, 110 | 'OmniAnomaly': {'win_size': 100, 'lr': 0.002}, 111 | 'USAD': {'win_size': 100, 'lr': 0.001}, 112 | 'Donut': {'win_size': 60, 'lr': 0.001}, 113 | 'TimesNet': {'win_size': 96, 'lr': 0.0001}, 114 | 'FITS': {'win_size': 100, 'lr': 0.001}, 115 | 'OFA': {'win_size': 50} 116 | } 117 | 118 | 119 | Uni_algo_HP_dict = { 120 | 'Sub_IForest': { 121 | 'periodicity': [1, 2, 3], 122 | 'n_estimators': [25, 50, 100, 150, 200] 123 | }, 124 | 'IForest': { 125 | 'n_estimators': [25, 50, 100, 150, 200] 126 | }, 127 | 'Sub_LOF': { 128 | 'periodicity': [1, 2, 3], 129 | 'n_neighbors': [10, 20, 30, 40, 50] 130 | }, 131 | 'LOF': { 132 | 'n_neighbors': [10, 20, 30, 40, 50] 133 | }, 134 | 'POLY': { 135 | 'periodicity': [1, 2, 3], 136 | 'power': [1, 2, 3, 4] 137 | }, 138 | 'MatrixProfile': { 139 | 'periodicity': [1, 2, 3] 140 | }, 141 | 'NORMA': { 142 | 'periodicity': [1, 2, 3], 143 | 'clustering': ['hierarchical', 'kshape'] 144 | }, 145 | 'SAND': { 146 | 'periodicity': [1, 2, 3] 147 | }, 148 | 'Series2Graph': { 149 | 'periodicity': [1, 2, 3] 150 | }, 151 | 'Sub_PCA': { 152 | 'periodicity': [1, 2, 3], 153 | 'n_components': [0.25, 0.5, 0.75, None] 154 | }, 155 | 'Sub_HBOS': { 156 | 'periodicity': [1, 2, 3], 157 | 'n_bins': [5, 10, 20, 30, 40] 158 | }, 159 | 'Sub_OCSVM': { 160 | 'periodicity': [1, 2, 3], 161 | 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'] 162 | }, 163 | 'Sub_MCD': { 164 | 'periodicity': [1, 2, 3], 165 | 'support_fraction': [0.2, 0.4, 0.6, 0.8, None] 166 | }, 167 | 'Sub_KNN': { 168 | 'periodicity': [1, 2, 3], 169 | 'n_neighbors': [10, 20, 30, 40, 50], 170 | }, 171 | 'KMeansAD_U': { 172 | 'periodicity': [1, 2, 3], 173 | 'n_clusters': [10, 20, 30, 40], 174 | }, 175 | 'KShapeAD': { 176 | 'periodicity': [1, 2, 3] 177 | }, 178 | 'AutoEncoder': { 179 | 'window_size': [50, 100, 150], 180 | 'hidden_neurons': [[64, 32], [32, 16], [128, 64]] 181 | }, 182 | 'CNN': { 183 | 'window_size': [50, 100, 150], 184 | 'num_channel': [[32, 32, 40], [16, 32, 64]] 185 | }, 186 | 'LSTMAD': { 187 | 'window_size': [50, 100, 150], 188 | 'lr': [0.0004, 0.0008] 189 | }, 190 | 'TranAD': { 191 | 'win_size': [5, 10, 50], 192 | 'lr': [1e-3, 1e-4] 193 | }, 194 | 'AnomalyTransformer': { 195 | 'win_size': [50, 100, 150], 196 | 'lr': [1e-3, 1e-4, 1e-5] 197 | }, 198 | 'OmniAnomaly': { 199 | 'win_size': [5, 50, 100], 200 | 'lr': [0.002, 0.0002] 201 | }, 202 | 'USAD': { 203 | 'win_size': [5, 50, 100], 204 | 'lr': [1e-3, 1e-4, 1e-5] 205 | }, 206 | 'Donut': { 207 | 'win_size': [60, 90, 120], 208 | 'lr': [1e-3, 1e-4, 1e-5] 209 | }, 210 | 'TimesNet': { 211 | 'win_size': [32, 96, 192], 212 | 'lr': [1e-3, 1e-4, 1e-5] 213 | }, 214 | 'FITS': { 215 | 'win_size': [100, 200], 216 | 'lr': [1e-3, 1e-4, 1e-5] 217 | }, 218 | 'OFA': { 219 | 'win_size': [50, 100, 150] 220 | }, 221 | 'Lag_Llama': { 222 | 'win_size': [32, 64, 96] 223 | }, 224 | 'Chronos': { 225 | 'win_size': [50, 100, 150] 226 | }, 227 | 'TimesFM': { 228 | 'win_size': [32, 64, 96] 229 | }, 230 | 'MOMENT_ZS': { 231 | 'win_size': [64, 128, 256] 232 | }, 233 | 'MOMENT_FT': { 234 | 'win_size': [64, 128, 256] 235 | } 236 | } 237 | 238 | Optimal_Uni_algo_HP_dict = { 239 | 'Sub_IForest': {'periodicity': 1, 'n_estimators': 150}, 240 | 'IForest': {'n_estimators': 200}, 241 | 'Sub_LOF': {'periodicity': 2, 'n_neighbors': 30}, 242 | 'LOF': {'n_neighbors': 50}, 243 | 'POLY': {'periodicity': 1, 'power': 4}, 244 | 'MatrixProfile': {'periodicity': 1}, 245 | 'NORMA': {'periodicity': 1, 'clustering': 'kshape'}, 246 | 'SAND': {'periodicity': 1}, 247 | 'Series2Graph': {'periodicity': 1}, 248 | 'SR': {'periodicity': 1}, 249 | 'Sub_PCA': {'periodicity': 1, 'n_components': None}, 250 | 'Sub_HBOS': {'periodicity': 1, 'n_bins': 10}, 251 | 'Sub_OCSVM': {'periodicity': 2, 'kernel': 'rbf'}, 252 | 'Sub_MCD': {'periodicity': 3, 'support_fraction': None}, 253 | 'Sub_KNN': {'periodicity': 2, 'n_neighbors': 50}, 254 | 'KMeansAD_U': {'periodicity': 2, 'n_clusters': 10}, 255 | 'KShapeAD': {'periodicity': 1}, 256 | 'FFT': {}, 257 | 'Left_STAMPi': {}, 258 | 'AutoEncoder': {'window_size': 100, 'hidden_neurons': [128, 64]}, 259 | 'CNN': {'window_size': 50, 'num_channel': [32, 32, 40]}, 260 | 'LSTMAD': {'window_size': 100, 'lr': 0.0008}, 261 | 'TranAD': {'win_size': 10, 'lr': 0.0001}, 262 | 'AnomalyTransformer': {'win_size': 50, 'lr': 0.001}, 263 | 'OmniAnomaly': {'win_size': 5, 'lr': 0.002}, 264 | 'USAD': {'win_size': 100, 'lr': 0.001}, 265 | 'Donut': {'win_size': 60, 'lr': 0.0001}, 266 | 'TimesNet': {'win_size': 32, 'lr': 0.0001}, 267 | 'FITS': {'win_size': 100, 'lr': 0.0001}, 268 | 'OFA': {'win_size': 50}, 269 | 'Lag_Llama': {'win_size': 96}, 270 | 'Chronos': {'win_size': 100}, 271 | 'TimesFM': {'win_size': 96}, 272 | 'MOMENT_ZS': {'win_size': 64}, 273 | 'MOMENT_FT': {'win_size': 64}, 274 | 'M2N2': {} 275 | } -------------------------------------------------------------------------------- /TSB_AD/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/TSB_AD/__init__.py -------------------------------------------------------------------------------- /TSB_AD/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/affiliation/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/affiliation/_affiliation_zone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from ._integral_interval import interval_intersection 4 | 5 | def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)): 6 | """ 7 | Helper for `E_gt_func` 8 | 9 | :param j: index from 0 to len(Js) (included) on which to get the start 10 | :param Js: ground truth events, as a list of couples 11 | :param Trange: range of the series where Js is included 12 | :return: generalized start such that the middle of t_start and t_stop 13 | always gives the affiliation zone 14 | """ 15 | b = max(Trange) 16 | n = len(Js) 17 | if j == n: 18 | return(2*b - t_stop(n-1, Js, Trange)) 19 | else: 20 | return(Js[j][0]) 21 | 22 | def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)): 23 | """ 24 | Helper for `E_gt_func` 25 | 26 | :param j: index from 0 to len(Js) (included) on which to get the stop 27 | :param Js: ground truth events, as a list of couples 28 | :param Trange: range of the series where Js is included 29 | :return: generalized stop such that the middle of t_start and t_stop 30 | always gives the affiliation zone 31 | """ 32 | if j == -1: 33 | a = min(Trange) 34 | return(2*a - t_start(0, Js, Trange)) 35 | else: 36 | return(Js[j][1]) 37 | 38 | def E_gt_func(j, Js, Trange): 39 | """ 40 | Get the affiliation zone of element j of the ground truth 41 | 42 | :param j: index from 0 to len(Js) (excluded) on which to get the zone 43 | :param Js: ground truth events, as a list of couples 44 | :param Trange: range of the series where Js is included, can 45 | be (-math.inf, math.inf) for distance measures 46 | :return: affiliation zone of element j of the ground truth represented 47 | as a couple 48 | """ 49 | range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2 50 | range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2 51 | return((range_left, range_right)) 52 | 53 | def get_all_E_gt_func(Js, Trange): 54 | """ 55 | Get the affiliation partition from the ground truth point of view 56 | 57 | :param Js: ground truth events, as a list of couples 58 | :param Trange: range of the series where Js is included, can 59 | be (-math.inf, math.inf) for distance measures 60 | :return: affiliation partition of the events 61 | """ 62 | # E_gt is the limit of affiliation/attraction for each ground truth event 63 | E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))] 64 | return(E_gt) 65 | 66 | def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]): 67 | """ 68 | Cut the events into the affiliation zones 69 | The presentation given here is from the ground truth point of view, 70 | but it is also used in the reversed direction in the main function. 71 | 72 | :param Is: events as a list of couples 73 | :param E_gt: range of the affiliation zones 74 | :return: a list of list of intervals (each interval represented by either 75 | a couple or None for empty interval). The outer list is indexed by each 76 | affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`. 77 | """ 78 | out = [None] * len(E_gt) 79 | for j in range(len(E_gt)): 80 | E_gt_j = E_gt[j] 81 | discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E 82 | discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E 83 | kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)] 84 | Is_j = [x for x, y in zip(Is, kept_index)] 85 | out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j] 86 | return(out) 87 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/affiliation/_single_ground_truth_event.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import math 4 | from ._affiliation_zone import ( 5 | get_all_E_gt_func, 6 | affiliation_partition) 7 | from ._integral_interval import ( 8 | integral_interval_distance, 9 | integral_interval_probaCDF_precision, 10 | integral_interval_probaCDF_recall, 11 | interval_length, 12 | sum_interval_lengths) 13 | 14 | def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)): 15 | """ 16 | Compute the individual average distance from Is to a single ground truth J 17 | 18 | :param Is: list of predicted events within the affiliation zone of J 19 | :param J: couple representating the start and stop of a ground truth interval 20 | :return: individual average precision directed distance number 21 | """ 22 | if all([I is None for I in Is]): # no prediction in the current area 23 | return(math.nan) # undefined 24 | return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is)) 25 | 26 | def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)): 27 | """ 28 | Compute the individual precision probability from Is to a single ground truth J 29 | 30 | :param Is: list of predicted events within the affiliation zone of J 31 | :param J: couple representating the start and stop of a ground truth interval 32 | :param E: couple representing the start and stop of the zone of affiliation of J 33 | :return: individual precision probability in [0, 1], or math.nan if undefined 34 | """ 35 | if all([I is None for I in Is]): # no prediction in the current area 36 | return(math.nan) # undefined 37 | return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is)) 38 | 39 | def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)): 40 | """ 41 | Compute the individual average distance from a single J to the predictions Is 42 | 43 | :param Is: list of predicted events within the affiliation zone of J 44 | :param J: couple representating the start and stop of a ground truth interval 45 | :return: individual average recall directed distance number 46 | """ 47 | Is = [I for I in Is if I is not None] # filter possible None in Is 48 | if len(Is) == 0: # there is no prediction in the current area 49 | return(math.inf) 50 | E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf)) # here from the point of view of the predictions 51 | Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is 52 | return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J)) 53 | 54 | def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)): 55 | """ 56 | Compute the individual recall probability from a single ground truth J to Is 57 | 58 | :param Is: list of predicted events within the affiliation zone of J 59 | :param J: couple representating the start and stop of a ground truth interval 60 | :param E: couple representing the start and stop of the zone of affiliation of J 61 | :return: individual recall probability in [0, 1] 62 | """ 63 | Is = [I for I in Is if I is not None] # filter possible None in Is 64 | if len(Is) == 0: # there is no prediction in the current area 65 | return(0) 66 | E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions 67 | Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is 68 | return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J)) 69 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/affiliation/generics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from itertools import groupby 4 | from operator import itemgetter 5 | import math 6 | import gzip 7 | import glob 8 | import os 9 | 10 | def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]): 11 | """ 12 | Convert a binary vector (indicating 1 for the anomalous instances) 13 | to a list of events. The events are considered as durations, 14 | i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1). 15 | 16 | :param vector: a list of elements belonging to {0, 1} 17 | :return: a list of couples, each couple representing the start and stop of 18 | each event 19 | """ 20 | positive_indexes = [idx for idx, val in enumerate(vector) if val > 0] 21 | events = [] 22 | for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]): 23 | cur_cut = list(map(itemgetter(1), g)) 24 | events.append((cur_cut[0], cur_cut[-1])) 25 | 26 | # Consistent conversion in case of range anomalies (for indexes): 27 | # A positive index i is considered as the interval [i, i+1), 28 | # so the last index should be moved by 1 29 | events = [(x, y+1) for (x,y) in events] 30 | 31 | return(events) 32 | 33 | def infer_Trange(events_pred, events_gt): 34 | """ 35 | Given the list of events events_pred and events_gt, get the 36 | smallest possible Trange corresponding to the start and stop indexes 37 | of the whole series. 38 | Trange will not influence the measure of distances, but will impact the 39 | measures of probabilities. 40 | 41 | :param events_pred: a list of couples corresponding to predicted events 42 | :param events_gt: a list of couples corresponding to ground truth events 43 | :return: a couple corresponding to the smallest range containing the events 44 | """ 45 | if len(events_gt) == 0: 46 | raise ValueError('The gt events should contain at least one event') 47 | if len(events_pred) == 0: 48 | # empty prediction, base Trange only on events_gt (which is non empty) 49 | return(infer_Trange(events_gt, events_gt)) 50 | 51 | min_pred = min([x[0] for x in events_pred]) 52 | min_gt = min([x[0] for x in events_gt]) 53 | max_pred = max([x[1] for x in events_pred]) 54 | max_gt = max([x[1] for x in events_gt]) 55 | Trange = (min(min_pred, min_gt), max(max_pred, max_gt)) 56 | return(Trange) 57 | 58 | def has_point_anomalies(events): 59 | """ 60 | Checking whether events contain point anomalies, i.e. 61 | events starting and stopping at the same time. 62 | 63 | :param events: a list of couples corresponding to predicted events 64 | :return: True is the events have any point anomalies, False otherwise 65 | """ 66 | if len(events) == 0: 67 | return(False) 68 | return(min([x[1] - x[0] for x in events]) == 0) 69 | 70 | def _sum_wo_nan(vec): 71 | """ 72 | Sum of elements, ignoring math.isnan ones 73 | 74 | :param vec: vector of floating numbers 75 | :return: sum of the elements, ignoring math.isnan ones 76 | """ 77 | vec_wo_nan = [e for e in vec if not math.isnan(e)] 78 | return(sum(vec_wo_nan)) 79 | 80 | def _len_wo_nan(vec): 81 | """ 82 | Count of elements, ignoring math.isnan ones 83 | 84 | :param vec: vector of floating numbers 85 | :return: count of the elements, ignoring math.isnan ones 86 | """ 87 | vec_wo_nan = [e for e in vec if not math.isnan(e)] 88 | return(len(vec_wo_nan)) 89 | 90 | def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'): 91 | """ 92 | Load a file compressed with gz, such that each line of the 93 | file is either 0 (representing a normal instance) or 1 (representing) 94 | an anomalous instance. 95 | :param filename: file path to the gz compressed file 96 | :return: list of integers with either 0 or 1 97 | """ 98 | with gzip.open(filename, 'rb') as f: 99 | content = f.read().splitlines() 100 | content = [int(x) for x in content] 101 | return(content) 102 | 103 | def read_all_as_events(): 104 | """ 105 | Load the files contained in the folder `data/` and convert 106 | to events. The length of the series is kept. 107 | The convention for the file name is: `dataset_algorithm.gz` 108 | :return: two dictionaries: 109 | - the first containing the list of events for each dataset and algorithm, 110 | - the second containing the range of the series for each dataset 111 | """ 112 | filepaths = glob.glob('data/*.gz') 113 | datasets = dict() 114 | Tranges = dict() 115 | for filepath in filepaths: 116 | vector = read_gz_data(filepath) 117 | events = convert_vector_to_events(vector) 118 | # ad hoc cut for those files 119 | cut_filepath = (os.path.split(filepath)[1]).split('_') 120 | data_name = cut_filepath[0] 121 | algo_name = (cut_filepath[1]).split('.')[0] 122 | if not data_name in datasets: 123 | datasets[data_name] = dict() 124 | Tranges[data_name] = (0, len(vector)) 125 | datasets[data_name][algo_name] = events 126 | return(datasets, Tranges) 127 | 128 | def f1_func(p, r): 129 | """ 130 | Compute the f1 function 131 | :param p: precision numeric value 132 | :param r: recall numeric value 133 | :return: f1 numeric value 134 | """ 135 | return(2*p*r/(p+r)) 136 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/affiliation/metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from .generics import ( 4 | infer_Trange, 5 | has_point_anomalies, 6 | _len_wo_nan, 7 | _sum_wo_nan, 8 | read_all_as_events) 9 | from ._affiliation_zone import ( 10 | get_all_E_gt_func, 11 | affiliation_partition) 12 | from ._single_ground_truth_event import ( 13 | affiliation_precision_distance, 14 | affiliation_recall_distance, 15 | affiliation_precision_proba, 16 | affiliation_recall_proba) 17 | 18 | def test_events(events): 19 | """ 20 | Verify the validity of the input events 21 | :param events: list of events, each represented by a couple (start, stop) 22 | :return: None. Raise an error for incorrect formed or non ordered events 23 | """ 24 | if type(events) is not list: 25 | raise TypeError('Input `events` should be a list of couples') 26 | if not all([type(x) is tuple for x in events]): 27 | raise TypeError('Input `events` should be a list of tuples') 28 | if not all([len(x) == 2 for x in events]): 29 | raise ValueError('Input `events` should be a list of couples (start, stop)') 30 | if not all([x[0] <= x[1] for x in events]): 31 | raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop') 32 | if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]): 33 | raise ValueError('Couples of input `events` should be disjoint and ordered') 34 | 35 | def pr_from_events(events_pred, events_gt, Trange): 36 | """ 37 | Compute the affiliation metrics including the precision/recall in [0,1], 38 | along with the individual precision/recall distances and probabilities 39 | 40 | :param events_pred: list of predicted events, each represented by a couple 41 | indicating the start and the stop of the event 42 | :param events_gt: list of ground truth events, each represented by a couple 43 | indicating the start and the stop of the event 44 | :param Trange: range of the series where events_pred and events_gt are included, 45 | represented as a couple (start, stop) 46 | :return: dictionary with precision, recall, and the individual metrics 47 | """ 48 | # testing the inputs 49 | test_events(events_pred) 50 | test_events(events_gt) 51 | 52 | # other tests 53 | minimal_Trange = infer_Trange(events_pred, events_gt) 54 | if not Trange[0] <= minimal_Trange[0]: 55 | raise ValueError('`Trange` should include all the events') 56 | if not minimal_Trange[1] <= Trange[1]: 57 | raise ValueError('`Trange` should include all the events') 58 | 59 | if len(events_gt) == 0: 60 | raise ValueError('Input `events_gt` should have at least one event') 61 | 62 | if has_point_anomalies(events_pred) or has_point_anomalies(events_gt): 63 | raise ValueError('Cannot manage point anomalies currently') 64 | 65 | if Trange is None: 66 | # Set as default, but Trange should be indicated if probabilities are used 67 | raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function') 68 | 69 | E_gt = get_all_E_gt_func(events_gt, Trange) 70 | aff_partition = affiliation_partition(events_pred, E_gt) 71 | 72 | # Computing precision distance 73 | d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)] 74 | 75 | # Computing recall distance 76 | d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)] 77 | 78 | # Computing precision 79 | p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)] 80 | 81 | # Computing recall 82 | p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)] 83 | 84 | if _len_wo_nan(p_precision) > 0: 85 | p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision) 86 | else: 87 | p_precision_average = p_precision[0] # math.nan 88 | p_recall_average = sum(p_recall) / len(p_recall) 89 | 90 | dict_out = dict({'Affiliation_Precision': p_precision_average, 91 | 'Affiliation_Recall': p_recall_average, 92 | 'individual_precision_probabilities': p_precision, 93 | 'individual_recall_probabilities': p_recall, 94 | 'individual_precision_distances': d_precision, 95 | 'individual_recall_distances': d_recall}) 96 | return(dict_out) 97 | 98 | def produce_all_results(): 99 | """ 100 | Produce the affiliation precision/recall for all files 101 | contained in the `data` repository 102 | :return: a dictionary indexed by data names, each containing a dictionary 103 | indexed by algorithm names, each containing the results of the affiliation 104 | metrics (precision, recall, individual probabilities and distances) 105 | """ 106 | datasets, Tranges = read_all_as_events() # read all the events in folder `data` 107 | results = dict() 108 | for data_name in datasets.keys(): 109 | results_data = dict() 110 | for algo_name in datasets[data_name].keys(): 111 | if algo_name != 'groundtruth': 112 | results_data[algo_name] = pr_from_events(datasets[data_name][algo_name], 113 | datasets[data_name]['groundtruth'], 114 | Tranges[data_name]) 115 | results[data_name] = results_data 116 | return(results) 117 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/metrics.py: -------------------------------------------------------------------------------- 1 | from .basic_metrics import basic_metricor, generate_curve 2 | 3 | def get_metrics(score, labels, slidingWindow=100, pred=None, version='opt', thre=250): 4 | metrics = {} 5 | 6 | ''' 7 | Threshold Independent 8 | ''' 9 | grader = basic_metricor() 10 | # AUC_ROC, Precision, Recall, PointF1, PointF1PA, Rrecall, ExistenceReward, OverlapReward, Rprecision, RF, Precision_at_k = grader.metric_new(labels, score, pred, plot_ROC=False) 11 | AUC_ROC = grader.metric_ROC(labels, score) 12 | AUC_PR = grader.metric_PR(labels, score) 13 | 14 | # R_AUC_ROC, R_AUC_PR, _, _, _ = grader.RangeAUC(labels=labels, score=score, window=slidingWindow, plot_ROC=True) 15 | _, _, _, _, _, _,VUS_ROC, VUS_PR = generate_curve(labels.astype(int), score, slidingWindow, version, thre) 16 | 17 | 18 | ''' 19 | Threshold Dependent 20 | if pred is None --> use the oracle threshold 21 | ''' 22 | 23 | PointF1 = grader.metric_PointF1(labels, score, preds=pred) 24 | PointF1PA = grader.metric_PointF1PA(labels, score, preds=pred) 25 | EventF1PA = grader.metric_EventF1PA(labels, score, preds=pred) 26 | RF1 = grader.metric_RF1(labels, score, preds=pred) 27 | Affiliation_F = grader.metric_Affiliation(labels, score, preds=pred) 28 | 29 | metrics['AUC-PR'] = AUC_PR 30 | metrics['AUC-ROC'] = AUC_ROC 31 | metrics['VUS-PR'] = VUS_PR 32 | metrics['VUS-ROC'] = VUS_ROC 33 | 34 | metrics['Standard-F1'] = PointF1 35 | metrics['PA-F1'] = PointF1PA 36 | metrics['Event-based-F1'] = EventF1PA 37 | metrics['R-based-F1'] = RF1 38 | metrics['Affiliation-F'] = Affiliation_F 39 | return metrics 40 | 41 | 42 | def get_metrics_pred(score, labels, pred, slidingWindow=100): 43 | metrics = {} 44 | 45 | grader = basic_metricor() 46 | 47 | PointF1 = grader.metric_PointF1(labels, score, preds=pred) 48 | PointF1PA = grader.metric_PointF1PA(labels, score, preds=pred) 49 | EventF1PA = grader.metric_EventF1PA(labels, score, preds=pred) 50 | RF1 = grader.metric_RF1(labels, score, preds=pred) 51 | Affiliation_F = grader.metric_Affiliation(labels, score, preds=pred) 52 | VUS_R, VUS_P, VUS_F = grader.metric_VUS_pred(labels, preds=pred, windowSize=slidingWindow) 53 | 54 | metrics['Standard-F1'] = PointF1 55 | metrics['PA-F1'] = PointF1PA 56 | metrics['Event-based-F1'] = EventF1PA 57 | metrics['R-based-F1'] = RF1 58 | metrics['Affiliation-F'] = Affiliation_F 59 | 60 | metrics['VUS-Recall'] = VUS_R 61 | metrics['VUS-Precision'] = VUS_P 62 | metrics['VUS-F'] = VUS_F 63 | 64 | return metrics 65 | -------------------------------------------------------------------------------- /TSB_AD/evaluation/visualize.py: -------------------------------------------------------------------------------- 1 | from basic_metrics import metricor 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import matplotlib.patches as mpatches 5 | 6 | def plotFig(data, label, score, slidingWindow, fileName, modelName, plotRange=None): 7 | grader = metricor() 8 | 9 | R_AUC, R_AP, R_fpr, R_tpr, R_prec = grader.RangeAUC(labels=label, score=score, window=slidingWindow, plot_ROC=True) # 10 | 11 | L, fpr, tpr= grader.metric_new(label, score, plot_ROC=True) 12 | precision, recall, AP = grader.metric_PR(label, score) 13 | 14 | range_anomaly = grader.range_convers_new(label) 15 | # print(range_anomaly) 16 | 17 | # max_length = min(len(score),len(data), 20000) 18 | max_length = len(score) 19 | 20 | if plotRange==None: 21 | plotRange = [0,max_length] 22 | 23 | fig3 = plt.figure(figsize=(12, 10), constrained_layout=True) 24 | gs = fig3.add_gridspec(3, 4) 25 | 26 | 27 | f3_ax1 = fig3.add_subplot(gs[0, :-1]) 28 | plt.tick_params(labelbottom=False) 29 | 30 | plt.plot(data[:max_length],'k') 31 | for r in range_anomaly: 32 | if r[0]==r[1]: 33 | plt.plot(r[0],data[r[0]],'r.') 34 | else: 35 | plt.plot(range(r[0],r[1]+1),data[range(r[0],r[1]+1)],'r') 36 | # plt.xlim([0,max_length]) 37 | plt.xlim(plotRange) 38 | 39 | 40 | # L = [auc, precision, recall, f, Rrecall, ExistenceReward, 41 | # OverlapReward, Rprecision, Rf, precision_at_k] 42 | f3_ax2 = fig3.add_subplot(gs[1, :-1]) 43 | # plt.tick_params(labelbottom=False) 44 | L1 = [ '%.2f' % elem for elem in L] 45 | plt.plot(score[:max_length]) 46 | plt.hlines(np.mean(score)+3*np.std(score),0,max_length,linestyles='--',color='red') 47 | plt.ylabel('score') 48 | # plt.xlim([0,max_length]) 49 | plt.xlim(plotRange) 50 | 51 | 52 | #plot the data 53 | f3_ax3 = fig3.add_subplot(gs[2, :-1]) 54 | index = ( label + 2*(score > (np.mean(score)+3*np.std(score)))) 55 | cf = lambda x: 'k' if x==0 else ('r' if x == 1 else ('g' if x == 2 else 'b') ) 56 | cf = np.vectorize(cf) 57 | 58 | color = cf(index[:max_length]) 59 | black_patch = mpatches.Patch(color = 'black', label = 'TN') 60 | red_patch = mpatches.Patch(color = 'red', label = 'FN') 61 | green_patch = mpatches.Patch(color = 'green', label = 'FP') 62 | blue_patch = mpatches.Patch(color = 'blue', label = 'TP') 63 | plt.scatter(np.arange(max_length), data[:max_length], c=color, marker='.') 64 | plt.legend(handles = [black_patch, red_patch, green_patch, blue_patch], loc= 'best') 65 | # plt.xlim([0,max_length]) 66 | plt.xlim(plotRange) 67 | 68 | 69 | f3_ax4 = fig3.add_subplot(gs[0, -1]) 70 | plt.plot(fpr, tpr) 71 | # plt.plot(R_fpr,R_tpr) 72 | # plt.title('R_AUC='+str(round(R_AUC,3))) 73 | plt.xlabel('FPR') 74 | plt.ylabel('TPR') 75 | # plt.legend(['ROC','Range-ROC']) 76 | 77 | # f3_ax5 = fig3.add_subplot(gs[1, -1]) 78 | # plt.plot(recall, precision) 79 | # plt.plot(R_tpr[:-1],R_prec) # I add (1,1) to (TPR, FPR) at the end !!! 80 | # plt.xlabel('Recall') 81 | # plt.ylabel('Precision') 82 | # plt.legend(['PR','Range-PR']) 83 | 84 | # print('AUC=', L1[0]) 85 | # print('F=', L1[3]) 86 | 87 | plt.suptitle(fileName + ' window='+str(slidingWindow) +' '+ modelName 88 | +'\nAUC='+L1[0]+' R_AUC='+str(round(R_AUC,2))+' Precision='+L1[1]+ ' Recall='+L1[2]+' F='+L1[3] 89 | + ' ExistenceReward='+L1[5]+' OverlapReward='+L1[6] 90 | +'\nAP='+str(round(AP,2))+' R_AP='+str(round(R_AP,2))+' Precision@k='+L1[9]+' Rprecision='+L1[7] + ' Rrecall='+L1[4] +' Rf='+L1[8] 91 | ) 92 | 93 | def printResult(data, label, score, slidingWindow, fileName, modelName): 94 | grader = metricor() 95 | R_AUC = grader.RangeAUC(labels=label, score=score, window=slidingWindow, plot_ROC=False) # 96 | L= grader.metric_new(label, score, plot_ROC=False) 97 | L.append(R_AUC) 98 | return L 99 | -------------------------------------------------------------------------------- /TSB_AD/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Qinghua Liu 3 | # License: Apache-2.0 License 4 | 5 | import pandas as pd 6 | import torch 7 | import random, argparse 8 | from sklearn.preprocessing import MinMaxScaler 9 | from .evaluation.metrics import get_metrics 10 | from .utils.slidingWindows import find_length_rank 11 | from .model_wrapper import * 12 | from .HP_list import Optimal_Uni_algo_HP_dict 13 | 14 | # seeding 15 | seed = 2024 16 | torch.manual_seed(seed) 17 | torch.cuda.manual_seed(seed) 18 | torch.cuda.manual_seed_all(seed) 19 | np.random.seed(seed) 20 | random.seed(seed) 21 | torch.backends.cudnn.benchmark = False 22 | torch.backends.cudnn.deterministic = True 23 | 24 | print("CUDA Available: ", torch.cuda.is_available()) 25 | print("cuDNN Version: ", torch.backends.cudnn.version()) 26 | 27 | 28 | if __name__ == '__main__': 29 | 30 | ## ArgumentParser 31 | parser = argparse.ArgumentParser(description='Running TSB-AD') 32 | parser.add_argument('--filename', type=str, default='001_NAB_id_1_Facility_tr_1007_1st_2014.csv') 33 | parser.add_argument('--data_direc', type=str, default='Datasets/TSB-AD-U/') 34 | parser.add_argument('--save', type=bool, default=False) 35 | parser.add_argument('--AD_Name', type=str, default='IForest') 36 | args = parser.parse_args() 37 | 38 | df = pd.read_csv(args.data_direc + args.filename).dropna() 39 | data = df.iloc[:, 0:-1].values.astype(float) 40 | label = df['Label'].astype(int).to_numpy() 41 | 42 | slidingWindow = find_length_rank(data, rank=1) 43 | train_index = args.filename.split('.')[0].split('_')[-3] 44 | data_train = data[:int(train_index), :] 45 | Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name] 46 | 47 | if args.AD_Name in Semisupervise_AD_Pool: 48 | output = run_Semisupervise_AD(args.AD_Name, data_train, data, **Optimal_Det_HP) 49 | elif args.AD_Name in Unsupervise_AD_Pool: 50 | output = run_Unsupervise_AD(args.AD_Name, data, **Optimal_Det_HP) 51 | else: 52 | raise Exception(f"{args.AD_Name} is not defined") 53 | 54 | if isinstance(output, np.ndarray): 55 | output = MinMaxScaler(feature_range=(0,1)).fit_transform(output.reshape(-1,1)).ravel() 56 | evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow, pred=output > (np.mean(output)+3*np.std(output))) 57 | print('Evaluation Result: ', evaluation_result) 58 | else: 59 | print(f'At {args.filename}: '+output) 60 | 61 | -------------------------------------------------------------------------------- /TSB_AD/models/COF.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This function is adapted from [pyod] by [yzhao062] 4 | Original source: [https://github.com/yzhao062/pyod] 5 | """ 6 | 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import warnings 11 | from operator import itemgetter 12 | 13 | import numpy as np 14 | from scipy.spatial import distance_matrix 15 | from scipy.spatial import minkowski_distance 16 | from sklearn.utils import check_array 17 | 18 | from .base import BaseDetector 19 | from ..utils.utility import check_parameter 20 | 21 | 22 | class COF(BaseDetector): 23 | """Connectivity-Based Outlier Factor (COF) COF uses the ratio of average 24 | chaining distance of data point and the average of average chaining 25 | distance of k nearest neighbor of the data point, as the outlier score 26 | for observations. 27 | 28 | See :cite:`tang2002enhancing` for details. 29 | 30 | Two version of COF are supported: 31 | 32 | - Fast COF: computes the entire pairwise distance matrix at the cost of a 33 | O(n^2) memory requirement. 34 | - Memory efficient COF: calculates pairwise distances incrementally. 35 | Use this implementation when it is not feasible to fit the n-by-n 36 | distance in memory. This leads to a linear overhead because many 37 | distances will have to be recalculated. 38 | 39 | Parameters 40 | ---------- 41 | contamination : float in (0., 0.5), optional (default=0.1) 42 | The amount of contamination of the data set, i.e. 43 | the proportion of outliers in the data set. Used when fitting to 44 | define the threshold on the decision function. 45 | 46 | n_neighbors : int, optional (default=20) 47 | Number of neighbors to use by default for k neighbors queries. 48 | Note that n_neighbors should be less than the number of samples. 49 | If n_neighbors is larger than the number of samples provided, 50 | all samples will be used. 51 | 52 | method : string, optional (default='fast') 53 | Valid values for method are: 54 | 55 | - 'fast' Fast COF, computes the full pairwise distance matrix up front. 56 | - 'memory' Memory-efficient COF, computes pairwise distances only when 57 | needed at the cost of computational speed. 58 | 59 | Attributes 60 | ---------- 61 | decision_scores_ : numpy array of shape (n_samples,) 62 | The outlier scores of the training data. 63 | The higher, the more abnormal. Outliers tend to have higher 64 | scores. This value is available once the detector is 65 | fitted. 66 | 67 | threshold_ : float 68 | The threshold is based on ``contamination``. It is the 69 | ``n_samples * contamination`` most abnormal samples in 70 | ``decision_scores_``. The threshold is calculated for generating 71 | binary outlier labels. 72 | 73 | labels_ : int, either 0 or 1 74 | The binary labels of the training data. 0 stands for inliers 75 | and 1 for outliers/anomalies. It is generated by applying 76 | ``threshold_`` on ``decision_scores_``. 77 | 78 | n_neighbors_: int 79 | Number of neighbors to use by default for k neighbors queries. 80 | """ 81 | 82 | def __init__(self, contamination=0.1, n_neighbors=20, method="fast"): 83 | super(COF, self).__init__(contamination=contamination) 84 | if isinstance(n_neighbors, int): 85 | check_parameter(n_neighbors, low=1, param_name='n_neighbors') 86 | else: 87 | raise TypeError( 88 | "n_neighbors should be int. Got %s" % type(n_neighbors)) 89 | self.n_neighbors = n_neighbors 90 | self.method = method 91 | 92 | def fit(self, X, y=None): 93 | """Fit detector. y is ignored in unsupervised methods. 94 | 95 | Parameters 96 | ---------- 97 | X : numpy array of shape (n_samples, n_features) 98 | The input samples. 99 | 100 | y : Ignored 101 | Not used, present for API consistency by convention. 102 | 103 | Returns 104 | ------- 105 | self : object 106 | Fitted estimator. 107 | """ 108 | X = check_array(X) 109 | self.n_train_ = X.shape[0] 110 | self.n_neighbors_ = self.n_neighbors 111 | 112 | if self.n_neighbors_ >= self.n_train_: 113 | self.n_neighbors_ = self.n_train_ - 1 114 | warnings.warn( 115 | "n_neighbors is set to the number of training points " 116 | "minus 1: {0}".format(self.n_neighbors_)) 117 | 118 | check_parameter(self.n_neighbors_, 1, self.n_train_, 119 | include_left=True, include_right=True) 120 | 121 | self._set_n_classes(y) 122 | self.decision_scores_ = self.decision_function(X) 123 | self._process_decision_scores() 124 | 125 | return self 126 | 127 | def decision_function(self, X): 128 | """Predict raw anomaly score of X using the fitted detector. 129 | The anomaly score of an input sample is computed based on different 130 | detector algorithms. For consistency, outliers are assigned with 131 | larger anomaly scores. 132 | 133 | Parameters 134 | ---------- 135 | X : numpy array of shape (n_samples, n_features) 136 | The training input samples. Sparse matrices are accepted only 137 | if they are supported by the base estimator. 138 | 139 | Returns 140 | ------- 141 | anomaly_scores : numpy array of shape (n_samples,) 142 | The anomaly score of the input samples. 143 | """ 144 | if self.method.lower() == "fast": 145 | return self._cof_fast(X) 146 | elif self.method.lower() == "memory": 147 | return self._cof_memory(X) 148 | else: 149 | raise ValueError("method should be set to either \'fast\' or \'memory\'. Got %s" % self.method) 150 | 151 | def _cof_memory(self, X): 152 | """ 153 | Connectivity-Based Outlier Factor (COF) Algorithm 154 | This function is called internally to calculate the 155 | Connectivity-Based Outlier Factor (COF) as an outlier 156 | score for observations. 157 | This function uses a memory efficient implementation at the cost of 158 | speed. 159 | :return: numpy array containing COF scores for observations. 160 | The greater the COF, the greater the outlierness. 161 | """ 162 | #dist_matrix = np.array(distance_matrix(X, X)) 163 | sbn_path_index = np.zeros((X.shape[0],self.n_neighbors_), dtype=np.int64) 164 | ac_dist, cof_ = np.zeros((X.shape[0])), np.zeros((X.shape[0])) 165 | for i in range(X.shape[0]): 166 | #sbn_path = np.argsort(dist_matrix[i]) 167 | sbn_path = np.argsort(minkowski_distance(X[i,:],X,p=2)) 168 | sbn_path_index[i,:] = sbn_path[1: self.n_neighbors_ + 1] 169 | cost_desc = np.zeros((self.n_neighbors_)) 170 | for j in range(self.n_neighbors_): 171 | #cost_desc.append( 172 | # np.min(dist_matrix[sbn_path[j + 1]][sbn_path][:j + 1])) 173 | cost_desc[j] = np.min(minkowski_distance(X[sbn_path[j + 1]],X,p=2)[sbn_path][:j + 1]) 174 | acd = np.zeros((self.n_neighbors_)) 175 | for _h, cost_ in enumerate(cost_desc): 176 | neighbor_add1 = self.n_neighbors_ + 1 177 | acd[_h] = ((2. * (neighbor_add1 - (_h + 1))) / (neighbor_add1 * self.n_neighbors_)) * cost_ 178 | ac_dist[i] = np.sum(acd) 179 | for _g in range(X.shape[0]): 180 | cof_[_g] = (ac_dist[_g] * self.n_neighbors_) / np.sum(ac_dist[sbn_path_index[_g]]) 181 | return np.nan_to_num(cof_) 182 | 183 | def _cof_fast(self, X): 184 | """ 185 | Connectivity-Based Outlier Factor (COF) Algorithm 186 | This function is called internally to calculate the 187 | Connectivity-Based Outlier Factor (COF) as an outlier 188 | score for observations. 189 | This function uses a fast implementation at the cost of memory. 190 | :return: numpy array containing COF scores for observations. 191 | The greater the COF, the greater the outlierness. 192 | """ 193 | dist_matrix = np.array(distance_matrix(X, X)) 194 | sbn_path_index, ac_dist, cof_ = [], [], [] 195 | for i in range(X.shape[0]): 196 | sbn_path = np.argsort(dist_matrix[i]) 197 | sbn_path_index.append(sbn_path[1: self.n_neighbors_ + 1]) 198 | cost_desc = [] 199 | for j in range(self.n_neighbors_): 200 | cost_desc.append( 201 | np.min(dist_matrix[sbn_path[j + 1]][sbn_path][:j + 1])) 202 | acd = [] 203 | for _h, cost_ in enumerate(cost_desc): 204 | neighbor_add1 = self.n_neighbors_ + 1 205 | acd.append(((2. * (neighbor_add1 - (_h + 1))) / ( 206 | neighbor_add1 * self.n_neighbors_)) * cost_) 207 | ac_dist.append(np.sum(acd)) 208 | for _g in range(X.shape[0]): 209 | cof_.append((ac_dist[_g] * self.n_neighbors_) / 210 | np.sum(itemgetter(*sbn_path_index[_g])(ac_dist))) 211 | return np.nan_to_num(cof_) -------------------------------------------------------------------------------- /TSB_AD/models/COPOD.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [pyod] by [yzhao062] 3 | Original source: [https://github.com/yzhao062/pyod] 4 | """ 5 | 6 | from __future__ import division 7 | from __future__ import print_function 8 | import warnings 9 | 10 | import numpy as np 11 | 12 | from joblib import Parallel, delayed 13 | from scipy.stats import skew as skew_sp 14 | from sklearn.utils.validation import check_is_fitted 15 | from sklearn.utils import check_array 16 | 17 | from .base import BaseDetector 18 | from ..utils.stat_models import column_ecdf 19 | from ..utils.utility import _partition_estimators 20 | from ..utils.utility import zscore 21 | 22 | def skew(X, axis=0): 23 | return np.nan_to_num(skew_sp(X, axis=axis)) 24 | 25 | def _parallel_ecdf(n_dims, X): 26 | """Private method to calculate ecdf in parallel. 27 | Parameters 28 | ---------- 29 | n_dims : int 30 | The number of dimensions of the current input matrix 31 | 32 | X : numpy array 33 | The subarray for building the ECDF 34 | 35 | Returns 36 | ------- 37 | U_l_mat : numpy array 38 | ECDF subarray. 39 | 40 | U_r_mat : numpy array 41 | ECDF subarray. 42 | """ 43 | U_l_mat = np.zeros([X.shape[0], n_dims]) 44 | U_r_mat = np.zeros([X.shape[0], n_dims]) 45 | 46 | for i in range(n_dims): 47 | U_l_mat[:, i: i + 1] = column_ecdf(X[:, i: i + 1]) 48 | U_r_mat[:, i: i + 1] = column_ecdf(X[:, i: i + 1] * -1) 49 | return U_l_mat, U_r_mat 50 | 51 | class COPOD(BaseDetector): 52 | """COPOD class for Copula Based Outlier Detector. 53 | COPOD is a parameter-free, highly interpretable outlier detection algorithm 54 | based on empirical copula models. 55 | See :cite:`li2020copod` for details. 56 | 57 | Parameters 58 | ---------- 59 | contamination : float in (0., 0.5), optional (default=0.1) 60 | The amount of contamination of the data set, i.e. 61 | the proportion of outliers in the data set. Used when fitting to 62 | define the threshold on the decision function. 63 | 64 | n_jobs : optional (default=1) 65 | The number of jobs to run in parallel for both `fit` and 66 | `predict`. If -1, then the number of jobs is set to the 67 | number of cores. 68 | 69 | Attributes 70 | ---------- 71 | decision_scores_ : numpy array of shape (n_samples,) 72 | The outlier scores of the training data. 73 | The higher, the more abnormal. Outliers tend to have higher 74 | scores. This value is available once the detector is 75 | fitted. 76 | threshold_ : float 77 | The threshold is based on ``contamination``. It is the 78 | ``n_samples * contamination`` most abnormal samples in 79 | ``decision_scores_``. The threshold is calculated for generating 80 | binary outlier labels. 81 | labels_ : int, either 0 or 1 82 | The binary labels of the training data. 0 stands for inliers 83 | and 1 for outliers/anomalies. It is generated by applying 84 | ``threshold_`` on ``decision_scores_``. 85 | """ 86 | 87 | def __init__(self, contamination=0.1, n_jobs=1, normalize=True): 88 | super(COPOD, self).__init__(contamination=contamination) 89 | 90 | #TODO: Make it parameterized for n_jobs 91 | self.n_jobs = n_jobs 92 | self.normalize = normalize 93 | 94 | def fit(self, X, y=None): 95 | """Fit detector. y is ignored in unsupervised methods. 96 | Parameters 97 | ---------- 98 | X : numpy array of shape (n_samples, n_features) 99 | The input samples. 100 | y : Ignored 101 | Not used, present for API consistency by convention. 102 | Returns 103 | ------- 104 | self : object 105 | Fitted estimator. 106 | """ 107 | X = check_array(X) 108 | if self.normalize: X = zscore(X, axis=1, ddof=1) 109 | 110 | self._set_n_classes(y) 111 | self.decision_scores_ = self.decision_function(X) 112 | self.X_train = X 113 | self._process_decision_scores() 114 | return self 115 | 116 | def decision_function(self, X): 117 | """Predict raw anomaly score of X using the fitted detector. 118 | For consistency, outliers are assigned with larger anomaly scores. 119 | Parameters 120 | ---------- 121 | X : numpy array of shape (n_samples, n_features) 122 | The training input samples. Sparse matrices are accepted only 123 | if they are supported by the base estimator. 124 | Returns 125 | ------- 126 | anomaly_scores : numpy array of shape (n_samples,) 127 | The anomaly score of the input samples. 128 | """ 129 | # use multi-thread execution 130 | if self.n_jobs != 1: 131 | return self._decision_function_parallel(X) 132 | if hasattr(self, 'X_train'): 133 | original_size = X.shape[0] 134 | X = np.concatenate((self.X_train, X), axis=0) 135 | self.U_l = -1 * np.log(column_ecdf(X)) 136 | self.U_r = -1 * np.log(column_ecdf(-X)) 137 | 138 | skewness = np.sign(skew(X, axis=0)) 139 | self.U_skew = self.U_l * -1 * np.sign( 140 | skewness - 1) + self.U_r * np.sign(skewness + 1) 141 | self.O = np.maximum(self.U_skew, np.add(self.U_l, self.U_r) / 2) 142 | if hasattr(self, 'X_train'): 143 | decision_scores_ = self.O.sum(axis=1)[-original_size:] 144 | else: 145 | decision_scores_ = self.O.sum(axis=1) 146 | return decision_scores_.ravel() 147 | 148 | def _decision_function_parallel(self, X): 149 | """Predict raw anomaly score of X using the fitted detector. 150 | For consistency, outliers are assigned with larger anomaly scores. 151 | Parameters 152 | ---------- 153 | X : numpy array of shape (n_samples, n_features) 154 | The training input samples. Sparse matrices are accepted only 155 | if they are supported by the base estimator. 156 | Returns 157 | ------- 158 | anomaly_scores : numpy array of shape (n_samples,) 159 | The anomaly score of the input samples. 160 | """ 161 | if hasattr(self, 'X_train'): 162 | original_size = X.shape[0] 163 | X = np.concatenate((self.X_train, X), axis=0) 164 | 165 | n_samples, n_features = X.shape[0], X.shape[1] 166 | 167 | if n_features < 2: 168 | raise ValueError( 169 | 'n_jobs should not be used on one dimensional dataset') 170 | 171 | if n_features <= self.n_jobs: 172 | self.n_jobs = n_features 173 | warnings.warn("n_features <= n_jobs; setting them equal instead.") 174 | 175 | n_jobs, n_dims_list, starts = _partition_estimators(n_features, 176 | self.n_jobs) 177 | 178 | all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, 179 | verbose=True)( 180 | delayed(_parallel_ecdf)( 181 | n_dims_list[i], 182 | X[:, starts[i]:starts[i + 1]], 183 | ) 184 | for i in range(n_jobs)) 185 | 186 | # recover the results 187 | self.U_l = np.zeros([n_samples, n_features]) 188 | self.U_r = np.zeros([n_samples, n_features]) 189 | 190 | for i in range(n_jobs): 191 | self.U_l[:, starts[i]:starts[i + 1]] = all_results[i][0] 192 | self.U_r[:, starts[i]:starts[i + 1]] = all_results[i][1] 193 | 194 | self.U_l = -1 * np.log(self.U_l) 195 | self.U_r = -1 * np.log(self.U_r) 196 | 197 | skewness = np.sign(skew(X, axis=0)) 198 | self.U_skew = self.U_l * -1 * np.sign( 199 | skewness - 1) + self.U_r * np.sign(skewness + 1) 200 | self.O = np.maximum(self.U_skew, np.add(self.U_l, self.U_r) / 2) 201 | if hasattr(self, 'X_train'): 202 | decision_scores_ = self.O.sum(axis=1)[-original_size:] 203 | else: 204 | decision_scores_ = self.O.sum(axis=1) 205 | return decision_scores_.ravel() -------------------------------------------------------------------------------- /TSB_AD/models/Chronos.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [chronos-forecasting] by [lostella et al.] 3 | Original source: [https://github.com/amazon-science/chronos-forecasting] 4 | """ 5 | 6 | from autogluon.timeseries import TimeSeriesPredictor 7 | from sklearn.preprocessing import MinMaxScaler 8 | import numpy as np 9 | import pandas as pd 10 | import tempfile 11 | 12 | from .base import BaseDetector 13 | 14 | 15 | class Chronos(BaseDetector): 16 | def __init__(self, 17 | win_size=100, 18 | model_size = 'base', # [tiny, small, base] 19 | prediction_length=1, 20 | input_c=1, 21 | batch_size=128): 22 | 23 | self.model_name = 'Chronos' 24 | self.model_size = model_size 25 | self.win_size = win_size 26 | self.prediction_length = prediction_length 27 | self.input_c = input_c 28 | self.batch_size = batch_size 29 | self.score_list = [] 30 | 31 | def fit(self, data): 32 | 33 | for channel in range(self.input_c): 34 | 35 | data_channel = data[:, channel].reshape(-1, 1) 36 | data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.win_size, predict_time_steps=self.prediction_length) 37 | # print('data_win: ', data_win.shape) # (2330, 100) 38 | # print('data_target: ', data_target.shape) # (2330, 1) 39 | 40 | train_data = [] 41 | count = 0 42 | for id in range(data_win.shape[0]): 43 | for tt in range(data_win.shape[1]): 44 | train_data.append([id, count, data_win[id, tt]]) 45 | count += 1 46 | train_data = pd.DataFrame(train_data, columns=['item_id', 'timestamp', 'target']) 47 | 48 | with tempfile.TemporaryDirectory() as temp_dir: 49 | 50 | predictor = TimeSeriesPredictor(prediction_length=self.prediction_length, path=temp_dir).fit( 51 | train_data, 52 | hyperparameters={ 53 | "Chronos": { 54 | "model_path": self.model_size, # base 55 | "device": "cuda", 56 | "batch_size": self.batch_size}}, 57 | skip_model_selection=True, 58 | verbosity=0) 59 | 60 | predictions = predictor.predict(train_data)['mean'].to_numpy().reshape(-1, self.prediction_length) 61 | print('predictions: ', predictions.shape) 62 | 63 | ### using mse as the anomaly score 64 | scores = (data_target.squeeze() - predictions.squeeze()) ** 2 65 | self.score_list.append(scores) 66 | 67 | scores_merge = np.mean(np.array(self.score_list), axis=0) 68 | # print('scores_merge: ', scores_merge.shape) 69 | 70 | padded_decision_scores = np.zeros(len(data)) 71 | padded_decision_scores[: self.win_size+self.prediction_length-1] = scores_merge[0] 72 | padded_decision_scores[self.win_size+self.prediction_length-1 : ]=scores_merge 73 | 74 | self.decision_scores_ = padded_decision_scores 75 | 76 | 77 | def decision_function(self, X): 78 | """ 79 | Not used, present for API consistency by convention. 80 | """ 81 | pass 82 | 83 | def create_dataset(self, X, slidingWindow, predict_time_steps=1): 84 | Xs, ys = [], [] 85 | for i in range(len(X) - slidingWindow - predict_time_steps+1): 86 | 87 | tmp = X[i : i + slidingWindow + predict_time_steps].ravel() 88 | # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel() 89 | 90 | x = tmp[:slidingWindow] 91 | y = tmp[slidingWindow:] 92 | Xs.append(x) 93 | ys.append(y) 94 | return np.array(Xs), np.array(ys) -------------------------------------------------------------------------------- /TSB_AD/models/FFT.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig] 3 | Original source: [https://github.com/TimeEval/TimeEval-algorithms] 4 | """ 5 | 6 | import numpy as np 7 | from dataclasses import dataclass 8 | from TSB_AD.models.base import BaseDetector 9 | from TSB_AD.utils.utility import zscore 10 | 11 | class FFT(BaseDetector): 12 | 13 | def __init__(self, ifft_parameters=5, local_neighbor_window=21, local_outlier_threshold=0.6, max_region_size=50, max_sign_change_distance=10, normalize=True): 14 | super().__init__() 15 | 16 | self.ifft_parameters = ifft_parameters 17 | self.local_neighbor_window = local_neighbor_window 18 | self.local_outlier_threshold = local_outlier_threshold 19 | self.max_region_size = max_region_size 20 | self.max_sign_change_distance = max_sign_change_distance 21 | self.normalize = normalize 22 | self.decision_scores_ = None 23 | 24 | def fit(self, X, y=None): 25 | """Fit detector. y is ignored in unsupervised methods.""" 26 | n_samples, n_features = X.shape 27 | if self.normalize: 28 | if n_features == 1: 29 | X = zscore(X, axis=0, ddof=0) 30 | else: 31 | X = zscore(X, axis=1, ddof=1) 32 | self.data = X 33 | self.decision_scores_ = self.detect_anomalies() 34 | return self 35 | 36 | def decision_function(self, X): 37 | """Predict raw anomaly score of X using the fitted detector.""" 38 | n_samples, n_features = X.shape 39 | decision_scores_ = np.zeros(n_samples) 40 | self.data = X 41 | local_outliers = self.calculate_local_outliers() 42 | if not local_outliers: 43 | print("No local outliers detected.") 44 | return np.zeros_like(self.data) 45 | 46 | regions = self.calculate_region_outliers(local_outliers) 47 | anomaly_scores = np.zeros_like(self.data) 48 | for region in regions: 49 | start_index = local_outliers[region.start_idx].index 50 | end_index = local_outliers[region.end_idx].index 51 | anomaly_scores[start_index:end_index + 1] = region.score 52 | 53 | decision_scores_ = anomaly_scores 54 | return decision_scores_ 55 | 56 | @staticmethod 57 | def reduce_parameters(f: np.ndarray, k: int) -> np.ndarray: 58 | transformed = f.copy() 59 | transformed[k:] = 0 60 | return transformed 61 | 62 | def calculate_local_outliers(self): 63 | n = len(self.data) 64 | k = max(min(self.ifft_parameters, n), 1) 65 | y = self.reduce_parameters(np.fft.fft(self.data), k) 66 | f2 = np.real(np.fft.ifft(y)) 67 | 68 | so = np.abs(f2 - self.data) 69 | mso = np.mean(so) 70 | neighbor_c = self.local_neighbor_window // 2 71 | 72 | scores = [] 73 | score_idxs = [] 74 | for i in range(n): 75 | if so[i] > mso: 76 | nav = np.mean(self.data[max(i - neighbor_c, 0):min(i + neighbor_c + 1, n)]) 77 | scores.append(self.data[i] - nav) 78 | score_idxs.append(i) 79 | 80 | if not scores: 81 | return [] 82 | 83 | ms = np.mean(scores) 84 | sds = np.std(scores) + 1e-6 85 | z_scores = (np.array(scores) - ms) / sds 86 | 87 | return [self.LocalOutlier(index=score_idxs[i], z_score=z_scores[i]) 88 | for i in range(len(scores)) if abs(z_scores[i]) > self.local_outlier_threshold] 89 | 90 | def calculate_region_outliers(self, local_outliers): 91 | def distance(a: int, b: int) -> int: 92 | return abs(local_outliers[b].index - local_outliers[a].index) 93 | 94 | regions = [] 95 | i = 0 96 | n_l = len(local_outliers) - 1 97 | while i < n_l: 98 | start_idx = i 99 | while i < n_l and distance(i, i + 1) <= self.max_sign_change_distance: 100 | i += 1 101 | end_idx = i 102 | if end_idx > start_idx: 103 | score = np.mean([abs(local_outliers[j].z_score) for j in range(start_idx, end_idx + 1)]) 104 | regions.append(self.RegionOutlier(start_idx=start_idx, end_idx=end_idx, score=score)) 105 | i += 1 106 | 107 | return regions 108 | 109 | @dataclass 110 | class LocalOutlier: 111 | index: int 112 | z_score: float 113 | 114 | @property 115 | def sign(self) -> int: 116 | return np.sign(self.z_score) 117 | 118 | @dataclass 119 | class RegionOutlier: 120 | start_idx: int 121 | end_idx: int 122 | score: float 123 | 124 | def detect_anomalies(self): 125 | """Detect anomalies by combining local and regional outliers.""" 126 | local_outliers = self.calculate_local_outliers() 127 | if not local_outliers: 128 | print("No local outliers detected.") 129 | return np.zeros_like(self.data) 130 | 131 | regions = self.calculate_region_outliers(local_outliers) 132 | anomaly_scores = np.zeros_like(self.data) 133 | for region in regions: 134 | start_index = local_outliers[region.start_idx].index 135 | end_index = local_outliers[region.end_idx].index 136 | anomaly_scores[start_index:end_index + 1] = region.score 137 | 138 | return anomaly_scores -------------------------------------------------------------------------------- /TSB_AD/models/FITS.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [FITS] by [VEWOXIC] 3 | Original source: [https://github.com/VEWOXIC/FITS] 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | from typing import Dict 11 | import torchinfo 12 | import tqdm 13 | import numpy as np 14 | import torch 15 | from torch import nn, optim 16 | from torch.utils.data import DataLoader 17 | import math 18 | 19 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu 20 | from ..utils.dataset import ReconstructDataset 21 | 22 | class Model(nn.Module): 23 | 24 | # FITS: Frequency Interpolation Time Series Forecasting 25 | 26 | def __init__(self, seq_len, pred_len, individual, enc_in, cut_freq): 27 | super(Model, self).__init__() 28 | self.seq_len = seq_len 29 | self.pred_len = pred_len 30 | self.individual = individual 31 | self.channels = enc_in 32 | 33 | self.dominance_freq = cut_freq # 720/24 34 | self.length_ratio = (self.seq_len + self.pred_len)/self.seq_len 35 | 36 | if self.individual: 37 | self.freq_upsampler = nn.ModuleList() 38 | for i in range(self.channels): 39 | self.freq_upsampler.append(nn.Linear(self.dominance_freq, int(self.dominance_freq*self.length_ratio)).to(torch.cfloat)) 40 | 41 | else: 42 | self.freq_upsampler = nn.Linear(self.dominance_freq, int(self.dominance_freq*self.length_ratio)).to(torch.cfloat) # complex layer for frequency upcampling] 43 | # configs.pred_len=configs.seq_len+configs.pred_len 44 | # #self.Dlinear=DLinear.Model(configs) 45 | # configs.pred_len=self.pred_len 46 | 47 | 48 | def forward(self, x): 49 | # RIN 50 | x_mean = torch.mean(x, dim=1, keepdim=True) 51 | x = x - x_mean 52 | x_var=torch.var(x, dim=1, keepdim=True)+ 1e-5 53 | # print(x_var) 54 | x = x / torch.sqrt(x_var) 55 | 56 | low_specx = torch.fft.rfft(x, dim=1) 57 | low_specx[:,self.dominance_freq:]=0 # LPF 58 | low_specx = low_specx[:,0:self.dominance_freq,:] # LPF 59 | # print(low_specx.permute(0,2,1)) 60 | if self.individual: 61 | low_specxy_ = torch.zeros([low_specx.size(0),int(self.dominance_freq*self.length_ratio),low_specx.size(2)],dtype=low_specx.dtype).to(low_specx.device) 62 | for i in range(self.channels): 63 | low_specxy_[:,:,i]=self.freq_upsampler[i](low_specx[:,:,i].permute(0,1)).permute(0,1) 64 | else: 65 | low_specxy_ = self.freq_upsampler(low_specx.permute(0,2,1)).permute(0,2,1) 66 | # print(low_specxy_) 67 | low_specxy = torch.zeros([low_specxy_.size(0),int((self.seq_len+self.pred_len)/2+1),low_specxy_.size(2)],dtype=low_specxy_.dtype).to(low_specxy_.device) 68 | low_specxy[:,0:low_specxy_.size(1),:]=low_specxy_ # zero padding 69 | low_xy=torch.fft.irfft(low_specxy, dim=1) 70 | low_xy=low_xy * self.length_ratio # energy compemsation for the length change 71 | # dom_x=x-low_x 72 | 73 | # dom_xy=self.Dlinear(dom_x) 74 | # xy=(low_xy+dom_xy) * torch.sqrt(x_var) +x_mean # REVERSE RIN 75 | xy=(low_xy) * torch.sqrt(x_var) +x_mean 76 | return xy, low_xy* torch.sqrt(x_var) 77 | 78 | 79 | class FITS(): 80 | def __init__(self, 81 | win_size=100, 82 | DSR=4, 83 | individual=True, 84 | input_c=1, 85 | batch_size=128, 86 | cut_freq=12, 87 | epochs=50, 88 | lr=1e-3, 89 | validation_size=0.2 90 | ): 91 | super().__init__() 92 | self.__anomaly_score = None 93 | 94 | self.cuda = True 95 | self.device = get_gpu(self.cuda) 96 | 97 | 98 | self.win_size = win_size 99 | self.DSR = DSR 100 | self.individual = individual 101 | self.input_c = input_c 102 | self.batch_size = batch_size 103 | self.cut_freq = cut_freq 104 | self.validation_size = validation_size 105 | 106 | self.model = Model(seq_len=self.win_size//self.DSR, pred_len=self.win_size-self.win_size//self.DSR, individual=self.individual, enc_in=self.input_c, cut_freq=self.cut_freq).to(self.device) 107 | 108 | self.epochs = epochs 109 | self.learning_rate = lr 110 | self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) 111 | self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75) 112 | self.loss = nn.MSELoss() 113 | self.anomaly_criterion = nn.MSELoss(reduce=False) 114 | 115 | self.save_path = None 116 | self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3) 117 | 118 | def fit(self, data): 119 | 120 | tsTrain = data[:int((1-self.validation_size)*len(data))] 121 | tsValid = data[int((1-self.validation_size)*len(data)):] 122 | 123 | train_loader = DataLoader( 124 | dataset=ReconstructDataset(tsTrain, window_size=self.win_size), 125 | batch_size=self.batch_size, 126 | shuffle=True 127 | ) 128 | 129 | valid_loader = DataLoader( 130 | dataset=ReconstructDataset(tsValid, window_size=self.win_size), 131 | batch_size=self.batch_size, 132 | shuffle=False 133 | ) 134 | 135 | for epoch in range(1, self.epochs + 1): 136 | self.model.train(mode=True) 137 | avg_loss = 0 138 | loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True) 139 | for idx, (x, target) in loop: 140 | 141 | x = x[:, ::self.DSR, :] 142 | x, target = x.to(self.device), target.to(self.device) 143 | self.optimizer.zero_grad() 144 | 145 | output, _ = self.model(x) 146 | 147 | # print('x: ', x.shape) 148 | # print('target: ', target.shape) 149 | 150 | loss = self.loss(output, target) 151 | loss.backward() 152 | 153 | self.optimizer.step() 154 | 155 | avg_loss += loss.cpu().item() 156 | loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]') 157 | loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1)) 158 | 159 | 160 | self.model.eval() 161 | avg_loss = 0 162 | loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True) 163 | with torch.no_grad(): 164 | for idx, (x, target) in loop: 165 | 166 | x = x[:, ::self.DSR, :] 167 | x, target = x.to(self.device), target.to(self.device) 168 | output, _ = self.model(x) 169 | loss = self.loss(output, target) 170 | avg_loss += loss.cpu().item() 171 | loop.set_description(f'Validation Epoch [{epoch}/{self.epochs}]') 172 | loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1)) 173 | 174 | valid_loss = avg_loss/max(len(valid_loader), 1) 175 | self.scheduler.step() 176 | 177 | self.early_stopping(valid_loss, self.model) 178 | if self.early_stopping.early_stop: 179 | print(" Early stopping<<<") 180 | break 181 | 182 | def decision_function(self, data): 183 | test_loader = DataLoader( 184 | dataset=ReconstructDataset(data, window_size=self.win_size), 185 | batch_size=self.batch_size, 186 | shuffle=False 187 | ) 188 | 189 | self.model.eval() 190 | scores = [] 191 | loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True) 192 | with torch.no_grad(): 193 | for idx, (x, target) in loop: 194 | 195 | x = x[:, ::self.DSR, :] 196 | x, target = x.to(self.device), target.to(self.device) 197 | output, _ = self.model(x) 198 | # loss = self.loss(output, target) 199 | score = torch.mean(self.anomaly_criterion(output, target), dim=-1) 200 | scores.append(score.cpu()[:,-1]) 201 | 202 | loop.set_description(f'Testing: ') 203 | 204 | scores = torch.cat(scores, dim=0) 205 | scores = scores.numpy().flatten() 206 | 207 | assert scores.ndim == 1 208 | self.__anomaly_score = scores 209 | 210 | if self.__anomaly_score.shape[0] < len(data): 211 | self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 212 | list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2)) 213 | 214 | return self.__anomaly_score 215 | 216 | 217 | def anomaly_score(self) -> np.ndarray: 218 | return self.__anomaly_score 219 | 220 | def param_statistic(self, save_file): 221 | model_stats = torchinfo.summary(self.model, (self.batch_size, self.input_len), verbose=0) 222 | with open(save_file, 'w') as f: 223 | f.write(str(model_stats)) -------------------------------------------------------------------------------- /TSB_AD/models/KMeansAD.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig] 3 | Original source: [https://github.com/TimeEval/TimeEval-algorithms] 4 | """ 5 | 6 | from sklearn.base import BaseEstimator, OutlierMixin 7 | from sklearn.cluster import KMeans 8 | import numpy as np 9 | from numpy.lib.stride_tricks import sliding_window_view 10 | from ..utils.utility import zscore 11 | 12 | class KMeansAD(BaseEstimator, OutlierMixin): 13 | def __init__(self, k, window_size, stride, n_jobs=1, normalize=True): 14 | self.k = k 15 | self.window_size = window_size 16 | self.stride = stride 17 | self.model = KMeans(n_clusters=k) 18 | self.padding_length = 0 19 | self.normalize = normalize 20 | 21 | def _preprocess_data(self, X: np.ndarray) -> np.ndarray: 22 | flat_shape = (X.shape[0] - (self.window_size - 1), -1) # in case we have a multivariate TS 23 | slides = sliding_window_view(X, window_shape=self.window_size, axis=0).reshape(flat_shape)[::self.stride, :] 24 | self.padding_length = X.shape[0] - (slides.shape[0] * self.stride + self.window_size - self.stride) 25 | print(f"Required padding_length={self.padding_length}") 26 | if self.normalize: slides = zscore(slides, axis=1, ddof=1) 27 | return slides 28 | 29 | def _custom_reverse_windowing(self, scores: np.ndarray) -> np.ndarray: 30 | print("Reversing window-based scores to point-based scores:") 31 | print(f"Before reverse-windowing: scores.shape={scores.shape}") 32 | # compute begin and end indices of windows 33 | begins = np.array([i * self.stride for i in range(scores.shape[0])]) 34 | ends = begins + self.window_size 35 | 36 | # prepare target array 37 | unwindowed_length = self.stride * (scores.shape[0] - 1) + self.window_size + self.padding_length 38 | mapped = np.full(unwindowed_length, fill_value=np.nan) 39 | 40 | # only iterate over window intersections 41 | indices = np.unique(np.r_[begins, ends]) 42 | for i, j in zip(indices[:-1], indices[1:]): 43 | window_indices = np.flatnonzero((begins <= i) & (j-1 < ends)) 44 | # print(i, j, window_indices) 45 | mapped[i:j] = np.nanmean(scores[window_indices]) 46 | 47 | # replace untouched indices with 0 (especially for the padding at the end) 48 | np.nan_to_num(mapped, copy=False) 49 | print(f"After reverse-windowing: scores.shape={mapped.shape}") 50 | return mapped 51 | 52 | def fit(self, X: np.ndarray, y=None, preprocess=True) -> 'KMeansAD': 53 | if preprocess: 54 | X = self._preprocess_data(X) 55 | self.model.fit(X) 56 | return self 57 | 58 | def predict(self, X: np.ndarray, preprocess=True) -> np.ndarray: 59 | if preprocess: 60 | X = self._preprocess_data(X) 61 | clusters = self.model.predict(X) 62 | diffs = np.linalg.norm(X - self.model.cluster_centers_[clusters], axis=1) 63 | return self._custom_reverse_windowing(diffs) 64 | 65 | def fit_predict(self, X, y=None) -> np.ndarray: 66 | X = self._preprocess_data(X) 67 | self.fit(X, y, preprocess=False) 68 | return self.predict(X, preprocess=False) -------------------------------------------------------------------------------- /TSB_AD/models/Lag_Llama.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [lag-llama] by [ashok-arjun&kashif] 3 | Original source: [https://github.com/time-series-foundation-models/lag-llama] 4 | """ 5 | 6 | from itertools import islice 7 | 8 | from matplotlib import pyplot as plt 9 | import matplotlib.dates as mdates 10 | 11 | import torch 12 | from gluonts.evaluation import make_evaluation_predictions 13 | from gluonts.dataset.pandas import PandasDataset 14 | import pandas as pd 15 | import numpy as np 16 | from ..utils.torch_utility import get_gpu 17 | 18 | from lag_llama.gluon.estimator import LagLlamaEstimator 19 | 20 | class Lag_Llama(): 21 | def __init__(self, 22 | win_size=96, 23 | prediction_length=1, 24 | input_c=1, 25 | use_rope_scaling=False, 26 | batch_size=64, 27 | num_samples=100, 28 | ckpt_path='lag-llama.ckpt'): 29 | 30 | self.model_name = 'Lag_Llama' 31 | self.context_length = win_size 32 | self.prediction_length = prediction_length 33 | self.input_c = input_c 34 | self.ckpt_path = ckpt_path 35 | self.use_rope_scaling = use_rope_scaling 36 | self.batch_size = batch_size 37 | self.num_samples = num_samples 38 | self.score_list = [] 39 | 40 | self.cuda = True 41 | self.device = get_gpu(self.cuda) 42 | 43 | 44 | def fit(self, data): 45 | 46 | for channel in range(self.input_c): 47 | 48 | data_channel = data[:, channel].reshape(-1, 1) 49 | data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.context_length, predict_time_steps=self.prediction_length) 50 | # print('data_win: ', data_win.shape) # (2330, 100) 51 | # print('data_target: ', data_target.shape) # (2330, 1) 52 | 53 | data_win = data_win.T 54 | 55 | date_rng = pd.date_range(start='2021-01-01', periods=data_win.shape[0], freq='H') # Dummy timestep 56 | df_wide = pd.DataFrame(data_win, index=date_rng) 57 | # Convert numerical columns to float 32 format for lag-llama 58 | for col in df_wide.columns: 59 | # Check if column is not of string type 60 | if df_wide[col].dtype != 'object' and pd.api.types.is_string_dtype(df_wide[col]) == False: 61 | df_wide[col] = df_wide[col].astype('float32') 62 | 63 | # Create a PandasDataset 64 | ds = PandasDataset(dict(df_wide)) 65 | 66 | ckpt = torch.load(self.ckpt_path, map_location=self.device) # Uses GPU since in this Colab we use a GPU. 67 | estimator_args = ckpt["hyper_parameters"]["model_kwargs"] 68 | 69 | rope_scaling_arguments = { 70 | "type": "linear", 71 | "factor": max(1.0, (self.context_length + self.prediction_length) / estimator_args["context_length"]), 72 | } 73 | 74 | estimator = LagLlamaEstimator( 75 | ckpt_path=self.ckpt_path, 76 | prediction_length=self.prediction_length, 77 | context_length=self.context_length, # Lag-Llama was trained with a context length of 32, but can work with any context length 78 | 79 | # estimator args 80 | input_size=estimator_args["input_size"], 81 | n_layer=estimator_args["n_layer"], 82 | n_embd_per_head=estimator_args["n_embd_per_head"], 83 | n_head=estimator_args["n_head"], 84 | scaling=estimator_args["scaling"], 85 | time_feat=estimator_args["time_feat"], 86 | rope_scaling=rope_scaling_arguments if self.use_rope_scaling else None, 87 | 88 | batch_size=self.batch_size, 89 | num_parallel_samples=100, 90 | device=self.device, 91 | ) 92 | 93 | lightning_module = estimator.create_lightning_module() 94 | transformation = estimator.create_transformation() 95 | predictor = estimator.create_predictor(transformation, lightning_module) 96 | 97 | forecast_it, ts_it = make_evaluation_predictions( 98 | dataset=ds, 99 | predictor=predictor, 100 | num_samples=self.num_samples 101 | ) 102 | forecasts = list(forecast_it) 103 | tss = list(ts_it) 104 | 105 | predictions = np.array([pred.mean for pred in forecasts]) 106 | 107 | # print('predictions: ', predictions.shape) 108 | 109 | ### using mse as the anomaly score 110 | scores = (data_target.squeeze() - predictions.squeeze()) ** 2 111 | self.score_list.append(scores) 112 | 113 | scores_merge = np.mean(np.array(self.score_list), axis=0) 114 | 115 | padded_decision_scores = np.zeros(len(data)) 116 | padded_decision_scores[: self.context_length+self.prediction_length-1] = scores_merge[0] 117 | padded_decision_scores[self.context_length+self.prediction_length-1 : ]=scores_merge 118 | 119 | self.decision_scores_ = padded_decision_scores 120 | 121 | 122 | def decision_function(self, X): 123 | """ 124 | Not used, present for API consistency by convention. 125 | """ 126 | pass 127 | 128 | def create_dataset(self, X, slidingWindow, predict_time_steps=1): 129 | Xs, ys = [], [] 130 | for i in range(len(X) - slidingWindow - predict_time_steps+1): 131 | 132 | tmp = X[i : i + slidingWindow + predict_time_steps].ravel() 133 | # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel() 134 | 135 | x = tmp[:slidingWindow] 136 | y = tmp[slidingWindow:] 137 | Xs.append(x) 138 | ys.append(y) 139 | return np.array(Xs), np.array(ys) -------------------------------------------------------------------------------- /TSB_AD/models/Left_STAMPi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | import math 4 | from stumpy import stumpi 5 | from TSB_AD.models.base import BaseDetector 6 | from TSB_AD.utils.utility import zscore 7 | 8 | class Left_STAMPi(BaseDetector): 9 | 10 | def __init__(self, n_init_train=100, window_size=50, normalize=True): 11 | super().__init__() 12 | self.n_init_train = n_init_train 13 | self.window_size = window_size 14 | self.normalize = normalize 15 | 16 | def fit(self, X, y=None): 17 | """Fit detector. y is ignored in unsupervised methods. 18 | 19 | Parameters 20 | ---------- 21 | X : numpy array of shape (n_samples, n_features) 22 | The input samples. 23 | 24 | y : Ignored 25 | Not used, present for API consistency by convention. 26 | 27 | Returns 28 | ------- 29 | self : object 30 | Fitted estimator. 31 | """ 32 | n_samples, n_features = X.shape 33 | if self.normalize: 34 | X = zscore(X, axis=0, ddof=0) 35 | 36 | warmup = self.n_init_train 37 | ws = self.window_size 38 | 39 | if ws > warmup: 40 | logging.warning(f"WARN: window_size is larger than n_init_train. Adjusting to n_init_train={warmup}.") 41 | ws = warmup 42 | if ws < 3: 43 | logging.warning("WARN: window_size must be at least 3. Adjusting to 3.") 44 | ws = 3 45 | 46 | self.stream = stumpi(X[:warmup, 0], m=ws, egress=False) 47 | for point in X[warmup:, 0]: 48 | self.stream.update(point) 49 | 50 | self.decision_scores_ = self.stream.left_P_ 51 | self.decision_scores_[:warmup] = 0 52 | 53 | return self 54 | 55 | def decision_function(self, X): 56 | """Predict raw anomaly score of X using the fitted detector. 57 | 58 | Parameters 59 | ---------- 60 | X : numpy array of shape (n_samples, n_features) 61 | The training input samples. 62 | 63 | Returns 64 | ------- 65 | anomaly_scores : numpy array of shape (n_samples,) 66 | The anomaly score of the input samples. 67 | """ 68 | n_samples = X.shape[0] 69 | padded_scores = self.pad_anomaly_scores(self.decision_scores_, n_samples, self.window_size) 70 | return padded_scores 71 | 72 | @staticmethod 73 | def pad_anomaly_scores(scores, n_samples, window_size): 74 | """ 75 | Pads the anomaly scores to match the length of the input time series. 76 | Padding is symmetric, using the first and last values. 77 | """ 78 | left_padding = [scores[0]] * math.ceil((window_size - 1) / 2) 79 | right_padding = [scores[-1]] * ((window_size - 1) // 2) 80 | padded_scores = np.array(left_padding + list(scores) + right_padding) 81 | 82 | return padded_scores[:n_samples] -------------------------------------------------------------------------------- /TSB_AD/models/MOMENT.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [moment] by [mononitogoswami] 3 | Original source: [https://github.com/moment-timeseries-foundation-model/moment] 4 | """ 5 | 6 | from momentfm import MOMENTPipeline 7 | from momentfm.utils.masking import Masking 8 | from sklearn.preprocessing import MinMaxScaler 9 | import numpy as np 10 | import pandas as pd 11 | import torch 12 | from torch.utils.data import DataLoader 13 | from tqdm import tqdm 14 | from torch import nn 15 | import math 16 | 17 | from .base import BaseDetector 18 | from ..utils.dataset import ReconstructDataset_Moment 19 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu 20 | 21 | class MOMENT(BaseDetector): 22 | def __init__(self, 23 | win_size=256, 24 | input_c=1, 25 | batch_size=128, 26 | epochs=2, 27 | validation_size=0, 28 | lr=1e-4): 29 | 30 | self.model_name = 'MOMENT' 31 | self.win_size = win_size 32 | self.input_c = input_c 33 | self.batch_size = batch_size 34 | self.anomaly_criterion = nn.MSELoss(reduce=False) 35 | self.epochs = epochs 36 | self.validation_size = validation_size 37 | self.lr = lr 38 | 39 | cuda = True 40 | self.cuda = cuda 41 | self.device = get_gpu(self.cuda) 42 | 43 | 44 | self.model = MOMENTPipeline.from_pretrained( 45 | "AutonLab/MOMENT-1-base", 46 | model_kwargs={"task_name": "reconstruction"}, # For anomaly detection, we will load MOMENT in `reconstruction` mode 47 | ) 48 | self.model.init() 49 | self.model = self.model.to("cuda").float() 50 | # Optimize Mean Squarred Error using your favourite optimizer 51 | self.criterion = torch.nn.MSELoss() 52 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) 53 | self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75) 54 | self.save_path = None 55 | self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3) 56 | 57 | def zero_shot(self, data): 58 | 59 | test_loader = DataLoader( 60 | dataset=ReconstructDataset_Moment(data, window_size=self.win_size), 61 | batch_size=self.batch_size, 62 | shuffle=False) 63 | 64 | trues, preds = [], [] 65 | self.score_list = [] 66 | with torch.no_grad(): 67 | for batch_x, batch_masks in tqdm(test_loader, total=len(test_loader)): 68 | batch_x = batch_x.to("cuda").float() 69 | batch_masks = batch_masks.to("cuda") 70 | batch_x = batch_x.permute(0,2,1) 71 | 72 | # print('batch_x: ', batch_x.shape) # [batch_size, n_channels, window_size] 73 | # print('batch_masks: ', batch_masks.shape) # [batch_size, window_size] 74 | 75 | output = self.model(x_enc=batch_x, input_mask=batch_masks) # [batch_size, n_channels, window_size] 76 | score = torch.mean(self.anomaly_criterion(batch_x, output.reconstruction), dim=-1).detach().cpu().numpy()[:, -1] 77 | self.score_list.append(score) 78 | 79 | self.__anomaly_score = np.concatenate(self.score_list, axis=0).reshape(-1) 80 | 81 | if self.__anomaly_score.shape[0] < len(data): 82 | self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 83 | list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2)) 84 | self.decision_scores_ = self.__anomaly_score 85 | 86 | 87 | def fit(self, data): 88 | tsTrain = data[:int((1-self.validation_size)*len(data))] 89 | tsValid = data[int((1-self.validation_size)*len(data)):] 90 | 91 | train_loader = DataLoader( 92 | dataset=ReconstructDataset_Moment(tsTrain, window_size=self.win_size), 93 | batch_size=self.batch_size, 94 | shuffle=True 95 | ) 96 | 97 | valid_loader = DataLoader( 98 | dataset=ReconstructDataset_Moment(tsValid, window_size=self.win_size), 99 | batch_size=self.batch_size, 100 | shuffle=False 101 | ) 102 | 103 | mask_generator = Masking(mask_ratio=0.3) # Mask 30% of patches randomly 104 | 105 | 106 | for epoch in range(1, self.epochs + 1): 107 | self.model.train() 108 | for batch_x, batch_masks in tqdm(train_loader, total=len(train_loader)): 109 | batch_x = batch_x.to(self.device).float() 110 | batch_x = batch_x.permute(0,2,1) 111 | # print('batch_x: ', batch_x.shape) 112 | 113 | original = batch_x 114 | n_channels = batch_x.shape[1] 115 | 116 | # Reshape to [batch_size * n_channels, 1, window_size] 117 | batch_x = batch_x.reshape((-1, 1, self.win_size)) 118 | 119 | batch_masks = batch_masks.to(self.device).long() 120 | batch_masks = batch_masks.repeat_interleave(n_channels, axis=0) 121 | 122 | # Randomly mask some patches of data 123 | mask = mask_generator.generate_mask( 124 | x=batch_x, input_mask=batch_masks).to(self.device).long() 125 | 126 | mask = torch.nn.functional.pad(mask, (0, batch_masks.size(1) - mask.size(1)), mode='constant', value=1) 127 | 128 | # Forward 129 | model_output = self.model(batch_x, input_mask=batch_masks, mask=mask).reconstruction 130 | model_output = torch.nn.functional.pad(model_output, (0, original.size(2)-model_output.size(2)), mode='replicate') 131 | 132 | output = model_output.reshape(original.size(0), n_channels, self.win_size) 133 | 134 | # Compute loss 135 | loss = self.criterion(output, original) 136 | 137 | # print(f"loss: {loss.item()}") 138 | 139 | # Backward 140 | self.optimizer.zero_grad() 141 | loss.backward() 142 | self.optimizer.step() 143 | 144 | # self.model.eval() 145 | # avg_loss = 0 146 | # with torch.no_grad(): 147 | # for batch_x, batch_masks in tqdm(valid_loader, total=len(valid_loader)): 148 | # batch_x = batch_x.to("cuda").float() 149 | # batch_masks = batch_masks.to("cuda") 150 | # batch_x = batch_x.permute(0,2,1) 151 | 152 | # print('batch_x: ', batch_x.shape) 153 | # print('batch_masks: ', batch_masks.shape) 154 | 155 | # output = self.model(batch_x, input_mask=batch_masks) 156 | 157 | # loss = self.criterion(output.reconstruction.reshape(-1, n_channels, self.win_size), batch_x) 158 | # print(f"loss: {loss.item()}") 159 | # avg_loss += loss.cpu().item() 160 | 161 | # valid_loss = avg_loss/max(len(valid_loader), 1) 162 | # self.scheduler.step() 163 | # self.early_stopping(valid_loss, self.model) 164 | # if self.early_stopping.early_stop: 165 | # print(" Early stopping<<<") 166 | # break 167 | 168 | def decision_function(self, data): 169 | """ 170 | Not used, present for API consistency by convention. 171 | """ 172 | 173 | test_loader = DataLoader( 174 | dataset=ReconstructDataset_Moment(data, window_size=self.win_size), 175 | batch_size=self.batch_size, 176 | shuffle=False) 177 | 178 | trues, preds = [], [] 179 | self.score_list = [] 180 | with torch.no_grad(): 181 | for batch_x, batch_masks in tqdm(test_loader, total=len(test_loader)): 182 | batch_x = batch_x.to("cuda").float() 183 | batch_masks = batch_masks.to("cuda") 184 | batch_x = batch_x.permute(0,2,1) 185 | 186 | # print('batch_x: ', batch_x.shape) # [batch_size, n_channels, window_size] 187 | # print('batch_masks: ', batch_masks.shape) # [batch_size, window_size] 188 | 189 | output = self.model(batch_x, input_mask=batch_masks) 190 | score = torch.mean(self.anomaly_criterion(batch_x, output.reconstruction), dim=-1).detach().cpu().numpy()[:, -1] 191 | self.score_list.append(score) 192 | 193 | self.__anomaly_score = np.concatenate(self.score_list, axis=0).reshape(-1) 194 | 195 | if self.__anomaly_score.shape[0] < len(data): 196 | self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 197 | list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2)) 198 | 199 | return self.__anomaly_score -------------------------------------------------------------------------------- /TSB_AD/models/MatrixProfile.py: -------------------------------------------------------------------------------- 1 | import stumpy 2 | import numpy as np 3 | 4 | class MatrixProfile(): 5 | """ 6 | Wrapper of the stympy implementation of the MatrixProfile algorithm 7 | 8 | Parameters 9 | ---------- 10 | window : int, 11 | target subsequence length. 12 | 13 | Attributes 14 | ---------- 15 | decision_scores_ : numpy array of shape (n_samples - m,) 16 | The anomaly score. 17 | The higher, the more abnormal. Anomalies tend to have higher 18 | scores. This value is available once the detector is 19 | fitted. 20 | """ 21 | 22 | def __init__(self, window): 23 | self.window = window 24 | self.model_name = 'MatrixProfile' 25 | 26 | def fit(self, X, y=None): 27 | """Fit detector. y is ignored in unsupervised methods. 28 | 29 | Parameters 30 | ---------- 31 | X : numpy array of shape (n_samples, ) 32 | The input samples. 33 | y : Ignored 34 | Not used, present for API consistency by convention. 35 | 36 | Returns 37 | ------- 38 | self : object 39 | Fitted estimator. 40 | """ 41 | self.profile = stumpy.stump(X.ravel(),m=self.window) 42 | #self.profile = mp.compute(X, windows=self.window) 43 | res = np.zeros(len(X)) 44 | res.fill(self.profile[:, 0].min()) 45 | res[self.window//2:-self.window//2+1] = self.profile[:, 0] 46 | self.decision_scores_ = res 47 | return self 48 | -------------------------------------------------------------------------------- /TSB_AD/models/NormA.txt: -------------------------------------------------------------------------------- 1 | # Algorithms protected by patent. Code protected by copyright and provided 2 | # as is. Email the authors for the password of the ZIP file (boniol.paul@gmail.com and themis@mi.parisdescartes.fr). Users from 3 | # the academia may use this code only for academic research purposes, 4 | # provided that the authors are properly acknowledged using the citations 5 | # below. Users from the industry may test and evaluate this code by 6 | # requesting a license. -------------------------------------------------------------------------------- /TSB_AD/models/OmniAnomaly.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [OmniAnomaly] by [TsingHuasuya et al.] 3 | Original source: [https://github.com/NetManAIOps/OmniAnomaly] 4 | """ 5 | 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import math 11 | import torch 12 | import torch.nn.functional as F 13 | from sklearn.utils import check_array 14 | from sklearn.utils.validation import check_is_fitted 15 | from torch import nn 16 | from torch.utils.data import DataLoader 17 | from sklearn.preprocessing import MinMaxScaler 18 | import tqdm 19 | 20 | from .base import BaseDetector 21 | from ..utils.dataset import ReconstructDataset 22 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu 23 | 24 | class OmniAnomalyModel(nn.Module): 25 | def __init__(self, feats, device): 26 | super(OmniAnomalyModel, self).__init__() 27 | self.name = 'OmniAnomaly' 28 | self.device = device 29 | self.lr = 0.002 30 | self.beta = 0.01 31 | self.n_feats = feats 32 | self.n_hidden = 32 33 | self.n_latent = 8 34 | self.lstm = nn.GRU(feats, self.n_hidden, 2) 35 | self.encoder = nn.Sequential( 36 | nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(), 37 | nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(), 38 | # nn.Flatten(), 39 | nn.Linear(self.n_hidden, 2*self.n_latent) 40 | ) 41 | self.decoder = nn.Sequential( 42 | nn.Linear(self.n_latent, self.n_hidden), nn.PReLU(), 43 | nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(), 44 | nn.Linear(self.n_hidden, self.n_feats), nn.Sigmoid(), 45 | ) 46 | 47 | def forward(self, x, hidden = None): 48 | bs = x.shape[0] 49 | win = x.shape[1] 50 | 51 | # hidden = torch.rand(2, bs, self.n_hidden, dtype=torch.float64) if hidden is not None else hidden 52 | hidden = torch.rand(2, bs, self.n_hidden).to(self.device) if hidden is not None else hidden 53 | 54 | out, hidden = self.lstm(x.view(-1, bs, self.n_feats), hidden) 55 | 56 | # print('out: ', out.shape) # (L, bs, n_hidden) 57 | # print('hidden: ', hidden.shape) # (2, bs, n_hidden) 58 | 59 | ## Encode 60 | x = self.encoder(out) 61 | mu, logvar = torch.split(x, [self.n_latent, self.n_latent], dim=-1) 62 | ## Reparameterization trick 63 | std = torch.exp(0.5*logvar) 64 | eps = torch.randn_like(std) 65 | x = mu + eps*std 66 | ## Decoder 67 | x = self.decoder(x) # (L, bs, n_feats) 68 | return x.reshape(bs, win*self.n_feats), mu.reshape(bs, win*self.n_latent), logvar.reshape(bs, win*self.n_latent), hidden 69 | 70 | 71 | class OmniAnomaly(BaseDetector): 72 | def __init__(self, 73 | win_size = 5, 74 | feats = 1, 75 | batch_size = 128, 76 | epochs = 50, 77 | patience = 3, 78 | lr = 0.002, 79 | validation_size=0.2 80 | ): 81 | super().__init__() 82 | 83 | self.__anomaly_score = None 84 | 85 | self.cuda = True 86 | self.device = get_gpu(self.cuda) 87 | 88 | self.win_size = win_size 89 | self.batch_size = batch_size 90 | self.epochs = epochs 91 | self.feats = feats 92 | self.validation_size = validation_size 93 | 94 | self.model = OmniAnomalyModel(feats=self.feats, device=self.device).to(self.device) 95 | self.optimizer = torch.optim.AdamW( 96 | self.model.parameters(), lr=lr, weight_decay=1e-5 97 | ) 98 | self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9) 99 | self.criterion = nn.MSELoss(reduction = 'none') 100 | 101 | self.early_stopping = EarlyStoppingTorch(None, patience=patience) 102 | 103 | def fit(self, data): 104 | tsTrain = data[:int((1-self.validation_size)*len(data))] 105 | tsValid = data[int((1-self.validation_size)*len(data)):] 106 | 107 | train_loader = DataLoader( 108 | dataset=ReconstructDataset(tsTrain, window_size=self.win_size), 109 | batch_size=self.batch_size, 110 | shuffle=True 111 | ) 112 | 113 | valid_loader = DataLoader( 114 | dataset=ReconstructDataset(tsValid, window_size=self.win_size), 115 | batch_size=self.batch_size, 116 | shuffle=False 117 | ) 118 | 119 | mses, klds = [], [] 120 | for epoch in range(1, self.epochs + 1): 121 | self.model.train(mode=True) 122 | n = epoch + 1 123 | avg_loss = 0 124 | loop = tqdm.tqdm( 125 | enumerate(train_loader), total=len(train_loader), leave=True 126 | ) 127 | for idx, (d, _) in loop: 128 | d = d.to(self.device) 129 | # print('d: ', d.shape) 130 | 131 | y_pred, mu, logvar, hidden = self.model(d, hidden if idx else None) 132 | d = d.view(-1, self.feats*self.win_size) 133 | MSE = torch.mean(self.criterion(y_pred, d), axis=-1) 134 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=-1) 135 | loss = torch.mean(MSE + self.model.beta * KLD) 136 | 137 | mses.append(torch.mean(MSE).item()) 138 | klds.append(self.model.beta * torch.mean(KLD).item()) 139 | self.optimizer.zero_grad() 140 | loss.backward() 141 | self.optimizer.step() 142 | 143 | avg_loss += loss.cpu().item() 144 | loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]") 145 | loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1)) 146 | 147 | if len(valid_loader) > 0: 148 | self.model.eval() 149 | avg_loss_val = 0 150 | loop = tqdm.tqdm( 151 | enumerate(valid_loader), total=len(valid_loader), leave=True 152 | ) 153 | with torch.no_grad(): 154 | for idx, (d, _) in loop: 155 | d = d.to(self.device) 156 | y_pred, mu, logvar, hidden = self.model(d, hidden if idx else None) 157 | d = d.view(-1, self.feats*self.win_size) 158 | MSE = torch.mean(self.criterion(y_pred, d), axis=-1) 159 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=-1) 160 | loss = torch.mean(MSE + self.model.beta * KLD) 161 | 162 | avg_loss_val += loss.cpu().item() 163 | loop.set_description( 164 | f"Validation Epoch [{epoch}/{self.epochs}]" 165 | ) 166 | loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1)) 167 | 168 | self.scheduler.step() 169 | if len(valid_loader) > 0: 170 | avg_loss = avg_loss_val / len(valid_loader) 171 | else: 172 | avg_loss = avg_loss / len(train_loader) 173 | self.early_stopping(avg_loss, self.model) 174 | if self.early_stopping.early_stop: 175 | print(" Early stopping<<<") 176 | break 177 | 178 | def decision_function(self, data): 179 | test_loader = DataLoader( 180 | dataset=ReconstructDataset(data, window_size=self.win_size), 181 | batch_size=self.batch_size, 182 | shuffle=False 183 | ) 184 | 185 | self.model.eval() 186 | scores = [] 187 | y_preds = [] 188 | loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True) 189 | 190 | with torch.no_grad(): 191 | for idx, (d, _) in loop: 192 | d = d.to(self.device) 193 | # print('d: ', d.shape) 194 | 195 | y_pred, _, _, hidden = self.model(d, hidden if idx else None) 196 | y_preds.append(y_pred) 197 | d = d.view(-1, self.feats*self.win_size) 198 | 199 | # print('y_pred: ', y_pred.shape) 200 | # print('d: ', d.shape) 201 | loss = torch.mean(self.criterion(y_pred, d), axis=-1) 202 | # print('loss: ', loss.shape) 203 | 204 | scores.append(loss.cpu()) 205 | 206 | scores = torch.cat(scores, dim=0) 207 | scores = scores.numpy() 208 | 209 | self.__anomaly_score = scores 210 | 211 | if self.__anomaly_score.shape[0] < len(data): 212 | self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 213 | list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2)) 214 | 215 | return self.__anomaly_score 216 | 217 | def anomaly_score(self) -> np.ndarray: 218 | return self.__anomaly_score 219 | 220 | def param_statistic(self, save_file): 221 | pass 222 | -------------------------------------------------------------------------------- /TSB_AD/models/README.md: -------------------------------------------------------------------------------- 1 | ### Extra Installation Direction 2 | 3 | If you want to use [Chronos](https://github.com/amazon-science/chronos-forecasting), please install the following 4 | ```bash 5 | git clone https://github.com/autogluon/autogluon 6 | cd autogluon && pip install -e timeseries/[TimeSeriesDataFrame,TimeSeriesPredictor] 7 | ``` 8 | 9 | If you want to use [MOMENT](https://github.com/moment-timeseries-foundation-model/moment), please install the following 10 | ```bash 11 | pip install momentfm # only support Python 3.11 for now 12 | ``` 13 | 14 | If you want to use [TimesFM](https://github.com/google-research/timesfm), please install the following 15 | ```bash 16 | pip install timesfm[torch] 17 | ``` 18 | 19 | If you want to use [Lag-Llama](https://github.com/time-series-foundation-models/lag-llama), please install the following 20 | ```bash 21 | gluonts[torch]<=0.14.4 22 | ``` 23 | and download the checkpoint from [Link](https://github.com/time-series-foundation-models/lag-llama) and add the path to [Lag_Llama.py](https://github.com/TheDatumOrg/TSB-AD/blob/main/TSB_AD/models/Lag_Llama.py). -------------------------------------------------------------------------------- /TSB_AD/models/RobustPCA.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig] 3 | Original source: [https://github.com/TimeEval/TimeEval-algorithms] 4 | """ 5 | 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | from sklearn.decomposition import PCA 11 | from typing import Optional 12 | 13 | from .base import BaseDetector 14 | from sklearn.utils.validation import check_is_fitted 15 | from sklearn.utils.validation import check_array 16 | from scipy.spatial.distance import cdist 17 | 18 | class Robust_PCA: 19 | def __init__(self, D, mu=None, lmbda=None): 20 | self.D = D 21 | self.S = np.zeros(self.D.shape) 22 | self.Y = np.zeros(self.D.shape) 23 | 24 | if mu: 25 | self.mu = mu 26 | else: 27 | self.mu = np.prod(self.D.shape) / (4 * np.linalg.norm(self.D, ord=1)) 28 | 29 | self.mu_inv = 1 / self.mu 30 | 31 | if lmbda: 32 | self.lmbda = lmbda 33 | else: 34 | self.lmbda = 1 / np.sqrt(np.max(self.D.shape)) 35 | 36 | @staticmethod 37 | def frobenius_norm(M): 38 | return np.linalg.norm(M, ord='fro') 39 | 40 | @staticmethod 41 | def shrink(M, tau): 42 | return np.sign(M) * np.maximum((np.abs(M) - tau), np.zeros(M.shape)) 43 | 44 | def svd_threshold(self, M, tau): 45 | U, S, V = np.linalg.svd(M, full_matrices=False) 46 | return np.dot(U, np.dot(np.diag(self.shrink(S, tau)), V)) 47 | 48 | def fit(self, tol=None, max_iter=1000, iter_print=100): 49 | iter = 0 50 | err = np.Inf 51 | Sk = self.S 52 | Yk = self.Y 53 | Lk = np.zeros(self.D.shape) 54 | 55 | if tol: 56 | _tol = tol 57 | else: 58 | _tol = 1E-7 * self.frobenius_norm(self.D) 59 | 60 | #this loop implements the principal component pursuit (PCP) algorithm 61 | #located in the table on page 29 of https://arxiv.org/pdf/0912.3599.pdf 62 | while (err > _tol) and iter < max_iter: 63 | Lk = self.svd_threshold( 64 | self.D - Sk + self.mu_inv * Yk, self.mu_inv) #this line implements step 3 65 | Sk = self.shrink( 66 | self.D - Lk + (self.mu_inv * Yk), self.mu_inv * self.lmbda) #this line implements step 4 67 | Yk = Yk + self.mu * (self.D - Lk - Sk) #this line implements step 5 68 | err = self.frobenius_norm(self.D - Lk - Sk) 69 | iter += 1 70 | if (iter % iter_print) == 0 or iter == 1 or iter > max_iter or err <= _tol: 71 | print('iteration: {0}, error: {1}'.format(iter, err)) 72 | 73 | self.L = Lk 74 | self.S = Sk 75 | return Lk, Sk 76 | 77 | class RobustPCA(BaseDetector): 78 | def __init__(self, max_iter: int = 1000, n_components = None, zero_pruning = True): 79 | self.pca: Optional[PCA] = None 80 | self.max_iter = max_iter 81 | self.n_components = n_components 82 | self.zero_pruning = zero_pruning 83 | 84 | def fit(self, X, y=None): 85 | 86 | if self.zero_pruning: 87 | non_zero_columns = np.any(X != 0, axis=0) 88 | X = X[:, non_zero_columns] 89 | 90 | rpca = Robust_PCA(X) 91 | L, S = rpca.fit(max_iter=self.max_iter) 92 | self.detector_ = PCA(n_components=L.shape[1]) 93 | self.detector_.fit(L) 94 | self.decision_scores_ = self.decision_function(L) 95 | return self 96 | 97 | # def decision_function(self, X): 98 | # check_is_fitted(self, ['detector_']) 99 | # X_transformed = self.detector_.transform(X) # Transform the data into the PCA space 100 | # X_reconstructed = self.detector_.inverse_transform(X_transformed) # Reconstruct the data from the PCA space 101 | # anomaly_scores = np.linalg.norm(X - X_reconstructed, axis=1) # Compute the Euclidean norm between original and reconstructed data 102 | # return anomaly_scores 103 | 104 | def decision_function(self, X): 105 | assert self.detector_, "Please train PCA before running the detection!" 106 | 107 | L = self.detector_.transform(X) 108 | S = np.absolute(X - L) 109 | return S.sum(axis=1) 110 | -------------------------------------------------------------------------------- /TSB_AD/models/SR.py: -------------------------------------------------------------------------------- 1 | """Spectral Residual 2 | """ 3 | # Author: Andreas Mueller 4 | import numpy as np 5 | 6 | def SR(X, window_size): 7 | X = (X - X.min()) / (X.max() - X.min()) 8 | X = X.ravel() 9 | fft = np.fft.fft(X) 10 | 11 | amp = np.abs(fft) 12 | log_amp = np.log(amp) 13 | phase = np.angle(fft) 14 | # split spectrum into bias term and symmetric frequencies 15 | bias, sym_freq = log_amp[:1], log_amp[1:] 16 | # select just the first half of the sym_freq 17 | freq = sym_freq[:(len(sym_freq) + 1) // 2] 18 | window_amp = 100 19 | 20 | pad_left = (window_amp - 1) // 2 21 | padded_freq = np.concatenate([np.tile(X[0], pad_left), freq, np.tile(X[-1], window_amp - pad_left - 1)]) 22 | conv_amp = np.ones(window_amp) / window_amp 23 | ma_freq = np.convolve(padded_freq, conv_amp, 'valid') 24 | # construct moving average log amplitude spectrum 25 | ma_log_amp = np.concatenate([ 26 | bias, 27 | ma_freq, 28 | (ma_freq[:-1] if len(sym_freq) % 2 == 1 else ma_freq)[::-1] 29 | ]) 30 | assert ma_log_amp.shape[0] == log_amp.shape[0], "`ma_log_amp` size does not match `log_amp` size." 31 | # compute residual spectrum and transform back to time domain 32 | res_amp = log_amp - ma_log_amp 33 | sr = np.abs(np.fft.ifft(np.exp(res_amp + 1j * phase))) 34 | return sr -------------------------------------------------------------------------------- /TSB_AD/models/Series2Graph.txt: -------------------------------------------------------------------------------- 1 | # Algorithms protected by patent. Code protected by copyright and provided 2 | # as is. Email the authors for the password of the ZIP file (boniol.paul@gmail.com and themis@mi.parisdescartes.fr). Users from 3 | # the academia may use this code only for academic research purposes, 4 | # provided that the authors are properly acknowledged using the citations 5 | # below. Users from the industry may test and evaluate this code by 6 | # requesting a license. 7 | -------------------------------------------------------------------------------- /TSB_AD/models/TimesFM.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [timesfm] by [siriuz42 et al.] 3 | Original source: [https://github.com/google-research/timesfm] 4 | """ 5 | 6 | import timesfm 7 | import numpy as np 8 | 9 | class TimesFM(): 10 | def __init__(self, 11 | win_size=96, 12 | prediction_length=1, 13 | input_c=1): 14 | 15 | self.model_name = 'TimesFM' 16 | self.win_size = win_size 17 | self.prediction_length = prediction_length 18 | self.input_c = input_c 19 | self.score_list = [] 20 | 21 | def fit(self, data): 22 | 23 | for channel in range(self.input_c): 24 | 25 | data_channel = data[:, channel].reshape(-1, 1) 26 | data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.win_size, predict_time_steps=self.prediction_length) 27 | # print('data_win: ', data_win.shape) # (2330, 100) 28 | # print('data_target: ', data_target.shape) # (2330, 1) 29 | 30 | # tfm = timesfm.TimesFm( 31 | # context_len=self.win_size, 32 | # horizon_len=self.prediction_length, 33 | # input_patch_len=32, 34 | # output_patch_len=128, 35 | # num_layers=20, 36 | # model_dims=1280, 37 | # backend="gpu") 38 | # tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m") 39 | 40 | tfm = timesfm.TimesFm( 41 | hparams=timesfm.TimesFmHparams( 42 | backend="gpu", 43 | per_core_batch_size=32, 44 | horizon_len=self.prediction_length, 45 | ), 46 | checkpoint=timesfm.TimesFmCheckpoint( 47 | huggingface_repo_id="google/timesfm-1.0-200m-pytorch"), 48 | ) 49 | 50 | forecast_input = [data_win[i, :] for i in range(data_win.shape[0])] 51 | point_forecast, _ = tfm.forecast(forecast_input) 52 | 53 | print('predictions: ', point_forecast.shape) 54 | 55 | ### using mse as the anomaly score 56 | scores = (data_target.squeeze() - point_forecast.squeeze()) ** 2 57 | # scores = np.mean(scores, axis=1) 58 | self.score_list.append(scores) 59 | 60 | scores_merge = np.mean(np.array(self.score_list), axis=0) 61 | # print('scores_merge: ', scores_merge.shape) 62 | 63 | padded_decision_scores = np.zeros(len(data)) 64 | padded_decision_scores[: self.win_size+self.prediction_length-1] = scores_merge[0] 65 | padded_decision_scores[self.win_size+self.prediction_length-1 : ]=scores_merge 66 | 67 | self.decision_scores_ = padded_decision_scores 68 | 69 | 70 | def decision_function(self, X): 71 | """ 72 | Not used, present for API consistency by convention. 73 | """ 74 | pass 75 | 76 | def create_dataset(self, X, slidingWindow, predict_time_steps=1): 77 | Xs, ys = [], [] 78 | for i in range(len(X) - slidingWindow - predict_time_steps+1): 79 | 80 | tmp = X[i : i + slidingWindow + predict_time_steps].ravel() 81 | # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel() 82 | 83 | x = tmp[:slidingWindow] 84 | y = tmp[slidingWindow:] 85 | Xs.append(x) 86 | ys.append(y) 87 | return np.array(Xs), np.array(ys) -------------------------------------------------------------------------------- /TSB_AD/models/USAD.py: -------------------------------------------------------------------------------- 1 | """ 2 | This function is adapted from [usad] by [manigalati] 3 | Original source: [https://github.com/manigalati/usad] 4 | """ 5 | 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import math 11 | import torch 12 | import torch.nn.functional as F 13 | from sklearn.utils import check_array 14 | from sklearn.utils.validation import check_is_fitted 15 | from torch import nn 16 | from torch.utils.data import DataLoader 17 | from sklearn.preprocessing import MinMaxScaler 18 | import tqdm 19 | 20 | from .base import BaseDetector 21 | from ..utils.dataset import ReconstructDataset 22 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu 23 | 24 | class USADModel(nn.Module): 25 | def __init__(self, feats, n_window=5): 26 | super(USADModel, self).__init__() 27 | self.name = 'USAD' 28 | self.lr = 0.0001 29 | self.n_feats = feats 30 | self.n_hidden = 16 31 | self.n_latent = 5 32 | self.n_window = n_window # USAD w_size = 5 33 | self.n = self.n_feats * self.n_window 34 | self.encoder = nn.Sequential( 35 | nn.Flatten(), 36 | nn.Linear(self.n, self.n_hidden), nn.ReLU(True), 37 | nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True), 38 | nn.Linear(self.n_hidden, self.n_latent), nn.ReLU(True), 39 | ) 40 | self.decoder1 = nn.Sequential( 41 | nn.Linear(self.n_latent,self.n_hidden), nn.ReLU(True), 42 | nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True), 43 | nn.Linear(self.n_hidden, self.n), nn.Sigmoid(), 44 | ) 45 | self.decoder2 = nn.Sequential( 46 | nn.Linear(self.n_latent,self.n_hidden), nn.ReLU(True), 47 | nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True), 48 | nn.Linear(self.n_hidden, self.n), nn.Sigmoid(), 49 | ) 50 | 51 | def forward(self, g): 52 | bs = g.shape[0] 53 | ## Encode 54 | # z = self.encoder(g.view(1,-1)) 55 | z = self.encoder(g.view(bs, self.n)) 56 | ## Decoders (Phase 1) 57 | ae1 = self.decoder1(z) 58 | ae2 = self.decoder2(z) 59 | ## Encode-Decode (Phase 2) 60 | ae2ae1 = self.decoder2(self.encoder(ae1)) 61 | # return ae1.view(-1), ae2.view(-1), ae2ae1.view(-1) 62 | return ae1.view(bs, self.n), ae2.view(bs, self.n), ae2ae1.view(bs, self.n) 63 | 64 | 65 | class USAD(BaseDetector): 66 | def __init__(self, 67 | win_size = 5, 68 | feats = 1, 69 | batch_size = 128, 70 | epochs = 10, 71 | patience = 3, 72 | lr = 1e-4, 73 | validation_size=0.2 74 | ): 75 | super().__init__() 76 | 77 | self.__anomaly_score = None 78 | 79 | self.cuda = True 80 | self.device = get_gpu(self.cuda) 81 | 82 | self.win_size = win_size 83 | self.batch_size = batch_size 84 | self.epochs = epochs 85 | self.feats = feats 86 | self.validation_size = validation_size 87 | 88 | self.model = USADModel(feats=self.feats, n_window=self.win_size).to(self.device) 89 | self.optimizer = torch.optim.AdamW( 90 | self.model.parameters(), lr=lr, weight_decay=1e-5 91 | ) 92 | self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9) 93 | self.criterion = nn.MSELoss(reduction = 'none') 94 | 95 | self.early_stopping = EarlyStoppingTorch(None, patience=patience) 96 | 97 | def fit(self, data): 98 | tsTrain = data[:int((1-self.validation_size)*len(data))] 99 | tsValid = data[int((1-self.validation_size)*len(data)):] 100 | 101 | train_loader = DataLoader( 102 | dataset=ReconstructDataset(tsTrain, window_size=self.win_size), 103 | batch_size=self.batch_size, 104 | shuffle=True 105 | ) 106 | 107 | valid_loader = DataLoader( 108 | dataset=ReconstructDataset(tsValid, window_size=self.win_size), 109 | batch_size=self.batch_size, 110 | shuffle=False 111 | ) 112 | 113 | l1s, l2s = [], [] 114 | for epoch in range(1, self.epochs + 1): 115 | self.model.train(mode=True) 116 | n = epoch + 1 117 | avg_loss = 0 118 | loop = tqdm.tqdm( 119 | enumerate(train_loader), total=len(train_loader), leave=True 120 | ) 121 | for idx, (d, _) in loop: 122 | d = d.to(self.device) # (bs, win, feat) 123 | # print('d: ', d.shape) 124 | 125 | ae1s, ae2s, ae2ae1s = self.model(d) 126 | # print('ae2ae1s: ', ae2ae1s.shape) 127 | 128 | d = d.view(ae2ae1s.shape[0], self.feats*self.win_size) 129 | 130 | l1 = (1 / n) * self.criterion(ae1s, d) + (1 - 1/n) * self.criterion(ae2ae1s, d) 131 | l2 = (1 / n) * self.criterion(ae2s, d) - (1 - 1/n) * self.criterion(ae2ae1s, d) 132 | # print('l1: ', l1.shape) 133 | 134 | l1s.append(torch.mean(l1).item()) 135 | l2s.append(torch.mean(l2).item()) 136 | loss = torch.mean(l1 + l2) 137 | 138 | self.optimizer.zero_grad() 139 | loss.backward() 140 | self.optimizer.step() 141 | 142 | avg_loss += loss.cpu().item() 143 | loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]") 144 | loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1)) 145 | 146 | if len(valid_loader) > 0: 147 | self.model.eval() 148 | avg_loss_val = 0 149 | loop = tqdm.tqdm( 150 | enumerate(valid_loader), total=len(valid_loader), leave=True 151 | ) 152 | with torch.no_grad(): 153 | for idx, (d, _) in loop: 154 | d = d.to(self.device) 155 | ae1s, ae2s, ae2ae1s = self.model(d) 156 | d = d.view(ae2ae1s.shape[0], self.feats*self.win_size) 157 | 158 | l1 = (1 / n) * self.criterion(ae1s, d) + (1 - 1/n) * self.criterion(ae2ae1s, d) 159 | l2 = (1 / n) * self.criterion(ae2s, d) - (1 - 1/n) * self.criterion(ae2ae1s, d) 160 | 161 | l1s.append(torch.mean(l1).item()) 162 | l2s.append(torch.mean(l2).item()) 163 | loss = torch.mean(l1 + l2) 164 | avg_loss_val += loss.cpu().item() 165 | loop.set_description( 166 | f"Validation Epoch [{epoch}/{self.epochs}]" 167 | ) 168 | loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1)) 169 | 170 | self.scheduler.step() 171 | if len(valid_loader) > 0: 172 | avg_loss = avg_loss_val / len(valid_loader) 173 | else: 174 | avg_loss = avg_loss / len(train_loader) 175 | self.early_stopping(avg_loss, self.model) 176 | if self.early_stopping.early_stop: 177 | print(" Early stopping<<<") 178 | break 179 | 180 | def decision_function(self, data): 181 | test_loader = DataLoader( 182 | dataset=ReconstructDataset(data, window_size=self.win_size), 183 | batch_size=self.batch_size, 184 | shuffle=False 185 | ) 186 | 187 | self.model.eval() 188 | scores = [] 189 | loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True) 190 | 191 | with torch.no_grad(): 192 | for idx, (d, _) in loop: 193 | d = d.to(self.device) 194 | # print('d: ', d.shape) 195 | 196 | ae1, ae2, ae2ae1 = self.model(d) 197 | d = d.view(ae2ae1.shape[0], self.feats*self.win_size) 198 | 199 | # print('ae2ae1: ', ae2ae1.shape) 200 | # print('d: ', d.shape) 201 | 202 | loss = 0.1 * self.criterion(ae1, d) + 0.9 * self.criterion(ae2ae1, d) 203 | # print('loss: ', loss.shape) 204 | loss = torch.mean(loss, axis=-1) 205 | 206 | scores.append(loss.cpu()) 207 | 208 | scores = torch.cat(scores, dim=0) 209 | scores = scores.numpy() 210 | 211 | self.__anomaly_score = scores 212 | 213 | if self.__anomaly_score.shape[0] < len(data): 214 | self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 215 | list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2)) 216 | 217 | return self.__anomaly_score 218 | 219 | def anomaly_score(self) -> np.ndarray: 220 | return self.__anomaly_score 221 | 222 | def param_statistic(self, save_file): 223 | pass 224 | -------------------------------------------------------------------------------- /TSB_AD/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/TSB_AD/models/__init__.py -------------------------------------------------------------------------------- /TSB_AD/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/TSB_AD/utils/__init__.py -------------------------------------------------------------------------------- /TSB_AD/utils/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import numpy as np 4 | epsilon = 1e-8 5 | 6 | class ReconstructDataset(torch.utils.data.Dataset): 7 | def __init__(self, data, window_size, stride=1, normalize=True): 8 | super().__init__() 9 | self.window_size = window_size 10 | self.stride = stride 11 | self.data = self._normalize_data(data) if normalize else data 12 | 13 | self.univariate = self.data.shape[1] == 1 14 | self.sample_num = max(0, (self.data.shape[0] - window_size) // stride + 1) 15 | self.samples, self.targets = self._generate_samples() 16 | 17 | def _normalize_data(self, data, epsilon=1e-8): 18 | mean, std = np.mean(data, axis=0), np.std(data, axis=0) 19 | std = np.where(std == 0, epsilon, std) # Avoid division by zero 20 | return (data - mean) / std 21 | 22 | def _generate_samples(self): 23 | data = torch.tensor(self.data, dtype=torch.float32) 24 | 25 | if self.univariate: 26 | data = data.squeeze() 27 | X = torch.stack([data[i * self.stride : i * self.stride + self.window_size] for i in range(self.sample_num)]) 28 | X = X.unsqueeze(-1) 29 | else: 30 | X = torch.stack([data[i * self.stride : i * self.stride + self.window_size, :] for i in range(self.sample_num)]) 31 | 32 | return X, X 33 | 34 | def __len__(self): 35 | return self.sample_num 36 | 37 | def __getitem__(self, index): 38 | return self.samples[index], self.targets[index] 39 | 40 | class ForecastDataset(torch.utils.data.Dataset): 41 | def __init__(self, data, window_size, pred_len, stride=1, normalize=True): 42 | super().__init__() 43 | self.window_size = window_size 44 | self.pred_len = pred_len 45 | self.stride = stride 46 | self.data = self._normalize_data(data) if normalize else data 47 | 48 | self.univariate = self.data.shape[1] == 1 49 | self.sample_num = max((self.data.shape[0] - window_size - pred_len) // stride + 1, 0) 50 | 51 | # Generate samples efficiently 52 | self.samples, self.targets = self._generate_samples() 53 | 54 | def _normalize_data(self, data, epsilon=1e-8): 55 | """ Normalize data using mean and standard deviation. """ 56 | mean, std = np.mean(data, axis=0), np.std(data, axis=0) 57 | std = np.where(std == 0, epsilon, std) # Avoid division by zero 58 | return (data - mean) / std 59 | 60 | def _generate_samples(self): 61 | """ Generate windowed samples efficiently using vectorized slicing. """ 62 | data = torch.tensor(self.data, dtype=torch.float32) 63 | 64 | indices = np.arange(0, self.sample_num * self.stride, self.stride) 65 | 66 | X = torch.stack([data[i : i + self.window_size] for i in indices]) 67 | Y = torch.stack([data[i + self.window_size : i + self.window_size + self.pred_len] for i in indices]) 68 | 69 | return X, Y # Inputs & targets 70 | 71 | def __len__(self): 72 | return self.sample_num 73 | 74 | def __getitem__(self, index): 75 | return self.samples[index], self.targets[index] 76 | 77 | class TSDataset(torch.utils.data.Dataset): 78 | 79 | def __init__(self, X, y=None, mean=None, std=None): 80 | super(TSDataset, self).__init__() 81 | self.X = X 82 | self.mean = mean 83 | self.std = std 84 | 85 | def __len__(self): 86 | return self.X.shape[0] 87 | 88 | def __getitem__(self, idx): 89 | if torch.is_tensor(idx): 90 | idx = idx.tolist() 91 | sample = self.X[idx, :] 92 | 93 | if self.mean is not None and self.std is not None: 94 | sample = (sample - self.mean) / self.std 95 | # assert_almost_equal (0, sample.mean(), decimal=1) 96 | 97 | return torch.from_numpy(sample), idx 98 | 99 | 100 | class ReconstructDataset_Moment(torch.utils.data.Dataset): 101 | def __init__(self, data, window_size, stride=1, normalize=True): 102 | super().__init__() 103 | self.window_size = window_size 104 | self.stride = stride 105 | self.data = self._normalize_data(data) if normalize else data 106 | 107 | self.univariate = self.data.shape[1] == 1 108 | self.sample_num = max((self.data.shape[0] - window_size) // stride + 1, 0) 109 | 110 | self.samples = self._generate_samples() 111 | self.input_mask = np.ones(self.window_size, dtype=np.float32) # Fixed input mask 112 | 113 | def _normalize_data(self, data, epsilon=1e-8): 114 | mean, std = np.mean(data, axis=0), np.std(data, axis=0) 115 | std = np.where(std == 0, epsilon, std) # Avoid division by zero 116 | return (data - mean) / std 117 | 118 | def _generate_samples(self): 119 | data = torch.tensor(self.data, dtype=torch.float32) 120 | indices = np.arange(0, self.sample_num * self.stride, self.stride) 121 | 122 | if self.univariate: 123 | X = torch.stack([data[i : i + self.window_size] for i in indices]) 124 | else: 125 | X = torch.stack([data[i : i + self.window_size, :] for i in indices]) 126 | 127 | return X 128 | 129 | def __len__(self): 130 | return self.sample_num 131 | 132 | def __getitem__(self, index): 133 | return self.samples[index], self.input_mask -------------------------------------------------------------------------------- /TSB_AD/utils/slidingWindows.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.stattools import acf 2 | from scipy.signal import argrelextrema 3 | import numpy as np 4 | from statsmodels.graphics.tsaplots import plot_acf 5 | 6 | # determine sliding window (period) based on ACF 7 | def find_length_rank(data, rank=1): 8 | data = data.squeeze() 9 | if len(data.shape)>1: return 0 10 | if rank==0: return 1 11 | data = data[:min(20000, len(data))] 12 | 13 | base = 3 14 | auto_corr = acf(data, nlags=400, fft=True)[base:] 15 | 16 | # plot_acf(data, lags=400, fft=True) 17 | # plt.xlabel('Lags') 18 | # plt.ylabel('Autocorrelation') 19 | # plt.title('Autocorrelation Function (ACF)') 20 | # plt.savefig('/data/liuqinghua/code/ts/TSAD-AutoML/AutoAD_Solution/candidate_pool/cd_diagram/ts_acf.png') 21 | 22 | local_max = argrelextrema(auto_corr, np.greater)[0] 23 | 24 | # print('auto_corr: ', auto_corr) 25 | # print('local_max: ', local_max) 26 | 27 | try: 28 | # max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max]) 29 | sorted_local_max = np.argsort([auto_corr[lcm] for lcm in local_max])[::-1] # Ascending order 30 | max_local_max = sorted_local_max[0] # Default 31 | if rank == 1: max_local_max = sorted_local_max[0] 32 | if rank == 2: 33 | for i in sorted_local_max[1:]: 34 | if i > sorted_local_max[0]: 35 | max_local_max = i 36 | break 37 | if rank == 3: 38 | for i in sorted_local_max[1:]: 39 | if i > sorted_local_max[0]: 40 | id_tmp = i 41 | break 42 | for i in sorted_local_max[id_tmp:]: 43 | if i > sorted_local_max[id_tmp]: 44 | max_local_max = i 45 | break 46 | # print('sorted_local_max: ', sorted_local_max) 47 | # print('max_local_max: ', max_local_max) 48 | if local_max[max_local_max]<3 or local_max[max_local_max]>300: 49 | return 125 50 | return local_max[max_local_max]+base 51 | except: 52 | return 125 53 | 54 | 55 | # determine sliding window (period) based on ACF, Original version 56 | def find_length(data): 57 | if len(data.shape)>1: 58 | return 0 59 | data = data[:min(20000, len(data))] 60 | 61 | base = 3 62 | auto_corr = acf(data, nlags=400, fft=True)[base:] 63 | 64 | 65 | local_max = argrelextrema(auto_corr, np.greater)[0] 66 | try: 67 | max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max]) 68 | if local_max[max_local_max]<3 or local_max[max_local_max]>300: 69 | return 125 70 | return local_max[max_local_max]+base 71 | except: 72 | return 125 73 | -------------------------------------------------------------------------------- /TSB_AD/utils/stat_models.py: -------------------------------------------------------------------------------- 1 | """ A collection of statistical models 2 | code copied from pyod documentation https://github.com/yzhao062/pyod/blob/master/pyod/utils/stat_models.py 3 | """ 4 | # Author: Yue Zhao 5 | # License: BSD 2 clause 6 | 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | from scipy.stats import pearsonr 12 | from sklearn.utils.validation import check_array 13 | from sklearn.utils.validation import check_consistent_length 14 | from numba import njit 15 | 16 | def pairwise_distances_no_broadcast(X, Y): 17 | """Utility function to calculate row-wise euclidean distance of two matrix. 18 | Different from pair-wise calculation, this function would not broadcast. 19 | For instance, X and Y are both (4,3) matrices, the function would return 20 | a distance vector with shape (4,), instead of (4,4). 21 | Parameters 22 | ---------- 23 | X : array of shape (n_samples, n_features) 24 | First input samples 25 | Y : array of shape (n_samples, n_features) 26 | Second input samples 27 | Returns 28 | ------- 29 | distance : array of shape (n_samples,) 30 | Row-wise euclidean distance of X and Y 31 | """ 32 | X = check_array(X) 33 | Y = check_array(Y) 34 | 35 | if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]: 36 | raise ValueError("pairwise_distances_no_broadcast function receive" 37 | "matrix with different shapes {0} and {1}".format( 38 | X.shape, Y.shape)) 39 | return _pairwise_distances_no_broadcast_helper(X, Y) 40 | 41 | 42 | def _pairwise_distances_no_broadcast_helper(X, Y): # pragma: no cover 43 | """Internal function for calculating the distance with numba. Do not use. 44 | Parameters 45 | ---------- 46 | X : array of shape (n_samples, n_features) 47 | First input samples 48 | Y : array of shape (n_samples, n_features) 49 | Second input samples 50 | Returns 51 | ------- 52 | distance : array of shape (n_samples,) 53 | Intermediate results. Do not use. 54 | """ 55 | euclidean_sq = np.square(Y - X) 56 | return np.sqrt(np.sum(euclidean_sq, axis=1)).ravel() 57 | 58 | 59 | def wpearsonr(x, y, w=None): 60 | """Utility function to calculate the weighted Pearson correlation of two 61 | samples. 62 | See https://stats.stackexchange.com/questions/221246/such-thing-as-a-weighted-correlation 63 | for more information 64 | Parameters 65 | ---------- 66 | x : array, shape (n,) 67 | Input x. 68 | y : array, shape (n,) 69 | Input y. 70 | w : array, shape (n,) 71 | Weights w. 72 | Returns 73 | ------- 74 | scores : float in range of [-1,1] 75 | Weighted Pearson Correlation between x and y. 76 | """ 77 | 78 | # unweighted version 79 | # note the return is different 80 | # TODO: fix output differences 81 | if w is None: 82 | return pearsonr(x, y) 83 | 84 | x = np.asarray(x) 85 | y = np.asarray(y) 86 | w = np.asarray(w) 87 | 88 | check_consistent_length([x, y, w]) 89 | # n = len(x) 90 | 91 | w_sum = w.sum() 92 | mx = np.sum(x * w) / w_sum 93 | my = np.sum(y * w) / w_sum 94 | 95 | xm, ym = (x - mx), (y - my) 96 | 97 | r_num = np.sum(xm * ym * w) / w_sum 98 | 99 | xm2 = np.sum(xm * xm * w) / w_sum 100 | ym2 = np.sum(ym * ym * w) / w_sum 101 | 102 | r_den = np.sqrt(xm2 * ym2) 103 | r = r_num / r_den 104 | 105 | r = max(min(r, 1.0), -1.0) 106 | 107 | # TODO: disable p value calculation due to python 2.7 break 108 | # df = n_train_ - 2 109 | # 110 | # if abs(r) == 1.0: 111 | # prob = 0.0 112 | # else: 113 | # t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r))) 114 | # prob = _betai(0.5 * df, 0.5, df / (df + t_squared)) 115 | return r # , prob 116 | 117 | 118 | ##################################### 119 | # PROBABILITY CALCULATIONS # 120 | ##################################### 121 | 122 | # TODO: disable p value calculation due to python 2.7 break 123 | # def _betai(a, b, x): 124 | # x = np.asarray(x) 125 | # x = np.where(x < 1.0, x, 1.0) # if x > 1 then return 1.0 126 | # return betainc(a, b, x) 127 | 128 | 129 | def pearsonr_mat(mat, w=None): 130 | """Utility function to calculate pearson matrix (row-wise). 131 | Parameters 132 | ---------- 133 | mat : numpy array of shape (n_samples, n_features) 134 | Input matrix. 135 | w : numpy array of shape (n_features,) 136 | Weights. 137 | Returns 138 | ------- 139 | pear_mat : numpy array of shape (n_samples, n_samples) 140 | Row-wise pearson score matrix. 141 | """ 142 | mat = check_array(mat) 143 | n_row = mat.shape[0] 144 | n_col = mat.shape[1] 145 | pear_mat = np.full([n_row, n_row], 1).astype(float) 146 | 147 | if w is not None: 148 | for cx in range(n_row): 149 | for cy in range(cx + 1, n_row): 150 | curr_pear = wpearsonr(mat[cx, :], mat[cy, :], w) 151 | pear_mat[cx, cy] = curr_pear 152 | pear_mat[cy, cx] = curr_pear 153 | else: 154 | for cx in range(n_col): 155 | for cy in range(cx + 1, n_row): 156 | curr_pear = pearsonr(mat[cx, :], mat[cy, :])[0] 157 | pear_mat[cx, cy] = curr_pear 158 | pear_mat[cy, cx] = curr_pear 159 | 160 | return pear_mat 161 | 162 | def column_ecdf(matrix: np.ndarray) -> np.ndarray: 163 | """ 164 | Utility function to compute the column wise empirical cumulative distribution of a 2D feature matrix, 165 | where the rows are samples and the columns are features per sample. The accumulation is done in the positive 166 | direction of the sample axis. 167 | 168 | E.G. 169 | p(1) = 0.2, p(0) = 0.3, p(2) = 0.1, p(6) = 0.4 170 | ECDF E(5) = p(x <= 5) 171 | ECDF E would be E(-1) = 0, E(0) = 0.3, E(1) = 0.5, E(2) = 0.6, E(3) = 0.6, E(4) = 0.6, E(5) = 0.6, E(6) = 1 172 | 173 | Similar to and tested against: 174 | https://www.statsmodels.org/stable/generated/statsmodels.distributions.empirical_distribution.ECDF.html 175 | 176 | Returns 177 | ------- 178 | 179 | """ 180 | # check the matrix dimensions 181 | assert len(matrix.shape) == 2, 'Matrix needs to be two dimensional for the ECDF computation.' 182 | 183 | # create a probability array the same shape as the feature matrix which we will reorder to build 184 | # the ecdf 185 | probabilities = np.linspace(np.ones(matrix.shape[1]) / matrix.shape[0], np.ones(matrix.shape[1]), matrix.shape[0]) 186 | 187 | # get the sorting indices for a numpy array 188 | sort_idx = np.argsort(matrix, axis=0) 189 | 190 | # sort the numpy array, as we need to look for duplicates in the feature values (that would have different 191 | # probabilities if we would just resort the probabilities array) 192 | matrix = np.take_along_axis(matrix, sort_idx, axis=0) 193 | 194 | # deal with equal values 195 | ecdf_terminate_equals_inplace(matrix, probabilities) 196 | 197 | # return the resorted accumulated probabilities (by reverting the sorting of the input matrix) 198 | # looks a little complicated but is faster this way 199 | reordered_probabilities = np.ones_like(probabilities) 200 | np.put_along_axis(reordered_probabilities, sort_idx, probabilities, axis=0) 201 | return reordered_probabilities 202 | 203 | @njit 204 | def ecdf_terminate_equals_inplace(matrix: np.ndarray, probabilities: np.ndarray): 205 | """ 206 | This is a helper function for computing the ecdf of an array. It has been outsourced from the original 207 | function in order to be able to use the njit compiler of numpy for increased speeds, as it unfortunately 208 | needs a loop over all rows and columns of a matrix. It acts in place on the probabilities' matrix. 209 | 210 | Parameters 211 | ---------- 212 | matrix : a feature matrix where the rows are samples and each column is a feature !(expected to be sorted)! 213 | 214 | probabilities : a probability matrix that will be used building the ecdf. It has values between 0 and 1 and 215 | is also sorted. 216 | 217 | Returns 218 | ------- 219 | 220 | """ 221 | for cx in range(probabilities.shape[1]): 222 | for rx in range(probabilities.shape[0] - 2, -1, -1): 223 | if matrix[rx, cx] == matrix[rx + 1, cx]: 224 | probabilities[rx, cx] = probabilities[rx + 1, cx] -------------------------------------------------------------------------------- /TSB_AD/utils/torch_utility.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | import subprocess as sp 5 | import os, math 6 | 7 | class EarlyStoppingTorch: 8 | """Early stops the training if validation loss doesn't improve after a given patience.""" 9 | def __init__(self, save_path=None, patience=7, verbose=False, delta=0.0001): 10 | """ 11 | Args: 12 | save_path : 13 | patience (int): How long to wait after last time validation loss improved. 14 | Default: 7 15 | verbose (bool): If True, prints a message for each validation loss improvement. 16 | Default: False 17 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 18 | Default: 0 19 | """ 20 | self.save_path = save_path 21 | self.patience = patience 22 | self.verbose = verbose 23 | self.counter = 0 24 | self.best_score = None 25 | self.early_stop = False 26 | self.val_loss_min = np.Inf 27 | self.delta = delta 28 | 29 | def __call__(self, val_loss, model): 30 | 31 | score = -val_loss 32 | 33 | if self.best_score is None: 34 | self.best_score = score 35 | self.save_checkpoint(val_loss, model) 36 | elif score < self.best_score + self.delta: 37 | self.counter += 1 38 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 39 | if self.counter >= self.patience: 40 | self.early_stop = True 41 | else: 42 | self.best_score = score 43 | self.save_checkpoint(val_loss, model) 44 | self.counter = 0 45 | 46 | def save_checkpoint(self, val_loss, model): 47 | '''Saves model when validation loss decrease.''' 48 | if self.verbose: 49 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 50 | if self.save_path: 51 | path = os.path.join(self.save_path, 'best_network.pth') 52 | torch.save(model.state_dict(), path) 53 | self.val_loss_min = val_loss 54 | 55 | class PositionalEmbedding(nn.Module): 56 | def __init__(self, d_model, max_len=5000): 57 | super(PositionalEmbedding, self).__init__() 58 | # Compute the positional encodings once in log space. 59 | pe = torch.zeros(max_len, d_model).float() 60 | pe.require_grad = False 61 | 62 | position = torch.arange(0, max_len).float().unsqueeze(1) 63 | div_term = (torch.arange(0, d_model, 2).float() 64 | * -(math.log(10000.0) / d_model)).exp() 65 | 66 | pe[:, 0::2] = torch.sin(position * div_term) 67 | pe[:, 1::2] = torch.cos(position * div_term) 68 | 69 | pe = pe.unsqueeze(0) 70 | self.register_buffer('pe', pe) 71 | 72 | def forward(self, x): 73 | return self.pe[:, :x.size(1)] 74 | 75 | class TokenEmbedding(nn.Module): 76 | def __init__(self, c_in, d_model): 77 | super(TokenEmbedding, self).__init__() 78 | padding = 1 if torch.__version__ >= '1.5.0' else 2 79 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 80 | kernel_size=3, padding=padding, padding_mode='circular', bias=False) 81 | for m in self.modules(): 82 | if isinstance(m, nn.Conv1d): 83 | nn.init.kaiming_normal_( 84 | m.weight, mode='fan_in', nonlinearity='leaky_relu') 85 | 86 | def forward(self, x): 87 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 88 | return x 89 | 90 | class TemporalEmbedding(nn.Module): 91 | def __init__(self, d_model, embed_type='fixed', freq='h'): 92 | super(TemporalEmbedding, self).__init__() 93 | 94 | minute_size = 4 95 | hour_size = 24 96 | weekday_size = 7 97 | day_size = 32 98 | month_size = 13 99 | 100 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding 101 | if freq == 't': 102 | self.minute_embed = Embed(minute_size, d_model) 103 | self.hour_embed = Embed(hour_size, d_model) 104 | self.weekday_embed = Embed(weekday_size, d_model) 105 | self.day_embed = Embed(day_size, d_model) 106 | self.month_embed = Embed(month_size, d_model) 107 | 108 | def forward(self, x): 109 | x = x.long() 110 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr( 111 | self, 'minute_embed') else 0. 112 | hour_x = self.hour_embed(x[:, :, 3]) 113 | weekday_x = self.weekday_embed(x[:, :, 2]) 114 | day_x = self.day_embed(x[:, :, 1]) 115 | month_x = self.month_embed(x[:, :, 0]) 116 | 117 | return hour_x + weekday_x + day_x + month_x + minute_x 118 | 119 | class FixedEmbedding(nn.Module): 120 | def __init__(self, c_in, d_model): 121 | super(FixedEmbedding, self).__init__() 122 | 123 | w = torch.zeros(c_in, d_model).float() 124 | w.require_grad = False 125 | 126 | position = torch.arange(0, c_in).float().unsqueeze(1) 127 | div_term = (torch.arange(0, d_model, 2).float() 128 | * -(math.log(10000.0) / d_model)).exp() 129 | 130 | w[:, 0::2] = torch.sin(position * div_term) 131 | w[:, 1::2] = torch.cos(position * div_term) 132 | 133 | self.emb = nn.Embedding(c_in, d_model) 134 | self.emb.weight = nn.Parameter(w, requires_grad=False) 135 | 136 | def forward(self, x): 137 | return self.emb(x).detach() 138 | 139 | class TimeFeatureEmbedding(nn.Module): 140 | def __init__(self, d_model, embed_type='timeF', freq='h'): 141 | super(TimeFeatureEmbedding, self).__init__() 142 | 143 | freq_map = {'h': 4, 't': 5, 's': 6, 144 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 145 | d_inp = freq_map[freq] 146 | self.embed = nn.Linear(d_inp, d_model, bias=False) 147 | 148 | def forward(self, x): 149 | return self.embed(x) 150 | 151 | class DataEmbedding(nn.Module): 152 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 153 | super(DataEmbedding, self).__init__() 154 | 155 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 156 | self.position_embedding = PositionalEmbedding(d_model=d_model) 157 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 158 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 159 | d_model=d_model, embed_type=embed_type, freq=freq) 160 | self.dropout = nn.Dropout(p=dropout) 161 | 162 | def forward(self, x, x_mark): 163 | if x_mark is None: 164 | x = self.value_embedding(x) + self.position_embedding(x) 165 | else: 166 | x = self.value_embedding( 167 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x) 168 | return self.dropout(x) 169 | 170 | def adjust_learning_rate(optimizer, epoch, lradj, learning_rate): 171 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 172 | if lradj == 'type1': 173 | lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch - 1) // 1))} 174 | elif lradj == 'type2': 175 | lr_adjust = { 176 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 177 | 10: 5e-7, 15: 1e-7, 20: 5e-8 178 | } 179 | if epoch in lr_adjust.keys(): 180 | lr = lr_adjust[epoch] 181 | for param_group in optimizer.param_groups: 182 | param_group['lr'] = lr 183 | print('Updating learning rate to {}'.format(lr)) 184 | 185 | 186 | def min_memory_id(): 187 | output = sp.check_output(["/usr/bin/nvidia-smi", "--query-gpu=memory.used", "--format=csv"]) 188 | memory = [int(s.split(" ")[0]) for s in output.decode().split("\n")[1:-1]] 189 | assert len(memory) == torch.cuda.device_count() 190 | return np.argmin(memory) 191 | 192 | 193 | def get_gpu(cuda): 194 | if cuda == True and torch.cuda.is_available(): 195 | try: 196 | device = torch.device(f"cuda:{min_memory_id()}") 197 | torch.cuda.set_device(device) 198 | print(f"----- Using GPU {torch.cuda.current_device()} -----") 199 | except: 200 | device = torch.device("cuda") 201 | print(f"----- Using GPU {torch.cuda.get_device_name()} -----") 202 | else: 203 | if cuda == True and not torch.cuda.is_available(): 204 | print("----- GPU is unavailable -----") 205 | device = torch.device("cpu") 206 | print("----- Using CPU -----") 207 | return device -------------------------------------------------------------------------------- /assets/fig/readme_title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/assets/fig/readme_title.png -------------------------------------------------------------------------------- /assets/fig/readme_title_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/assets/fig/readme_title_2.png -------------------------------------------------------------------------------- /assets/fig/tsb_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/assets/fig/tsb_overview.png -------------------------------------------------------------------------------- /benchmark_exp/HP_Tuning_M.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Qinghua Liu 3 | # License: Apache-2.0 License 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | import random, argparse, time, os 9 | import itertools 10 | from TSB_AD.evaluation.metrics import get_metrics 11 | from TSB_AD.utils.slidingWindows import find_length_rank 12 | from TSB_AD.model_wrapper import * 13 | from TSB_AD.HP_list import Multi_algo_HP_dict 14 | 15 | # seeding 16 | seed = 2024 17 | torch.manual_seed(seed) 18 | torch.cuda.manual_seed(seed) 19 | torch.cuda.manual_seed_all(seed) 20 | np.random.seed(seed) 21 | random.seed(seed) 22 | torch.backends.cudnn.benchmark = False 23 | torch.backends.cudnn.deterministic = True 24 | 25 | print("CUDA available: ", torch.cuda.is_available()) 26 | print("cuDNN version: ", torch.backends.cudnn.version()) 27 | 28 | if __name__ == '__main__': 29 | 30 | Start_T = time.time() 31 | ## ArgumentParser 32 | parser = argparse.ArgumentParser(description='HP Tuning') 33 | parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-M/') 34 | parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-M-Tuning.csv') 35 | parser.add_argument('--save_dir', type=str, default='eval/HP_tuning/multi/') 36 | parser.add_argument('--AD_Name', type=str, default='IForest') 37 | args = parser.parse_args() 38 | 39 | file_list = pd.read_csv(args.file_lsit)['file_name'].values 40 | 41 | Det_HP = Multi_algo_HP_dict[args.AD_Name] 42 | 43 | keys, values = zip(*Det_HP.items()) 44 | combinations = [dict(zip(keys, v)) for v in itertools.product(*values)] 45 | 46 | write_csv = [] 47 | for filename in file_list: 48 | print('Processing:{} by {}'.format(filename, args.AD_Name)) 49 | 50 | file_path = os.path.join(args.dataset_dir, filename) 51 | df = pd.read_csv(file_path).dropna() 52 | data = df.iloc[:, 0:-1].values.astype(float) 53 | label = df['Label'].astype(int).to_numpy() 54 | # print('data: ', data.shape) 55 | # print('label: ', label.shape) 56 | 57 | feats = data.shape[1] 58 | slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1) 59 | train_index = filename.split('.')[0].split('_')[-3] 60 | data_train = data[:int(train_index), :] 61 | 62 | for params in combinations: 63 | 64 | if args.AD_Name in Semisupervise_AD_Pool: 65 | output = run_Semisupervise_AD(args.AD_Name, data_train, data, **params) 66 | elif args.AD_Name in Unsupervise_AD_Pool: 67 | output = run_Unsupervise_AD(args.AD_Name, data, **params) 68 | else: 69 | raise Exception(f"{args.AD_Name} is not defined") 70 | 71 | try: 72 | evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow) 73 | print('evaluation_result: ', evaluation_result) 74 | list_w = list(evaluation_result.values()) 75 | except: 76 | list_w = [0]*9 77 | list_w.insert(0, params) 78 | list_w.insert(0, filename) 79 | write_csv.append(list_w) 80 | 81 | ## Temp Save 82 | col_w = list(evaluation_result.keys()) 83 | col_w.insert(0, 'HP') 84 | col_w.insert(0, 'file') 85 | w_csv = pd.DataFrame(write_csv, columns=col_w) 86 | 87 | w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False) -------------------------------------------------------------------------------- /benchmark_exp/HP_Tuning_U.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Qinghua Liu 3 | # License: Apache-2.0 License 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | import random, argparse, time, os 9 | import itertools 10 | from TSB_AD.evaluation.metrics import get_metrics 11 | from TSB_AD.utils.slidingWindows import find_length_rank 12 | from TSB_AD.model_wrapper import * 13 | from TSB_AD.HP_list import Uni_algo_HP_dict 14 | 15 | # seeding 16 | seed = 2024 17 | torch.manual_seed(seed) 18 | torch.cuda.manual_seed(seed) 19 | torch.cuda.manual_seed_all(seed) 20 | np.random.seed(seed) 21 | random.seed(seed) 22 | torch.backends.cudnn.benchmark = False 23 | torch.backends.cudnn.deterministic = True 24 | 25 | print("CUDA available: ", torch.cuda.is_available()) 26 | print("cuDNN version: ", torch.backends.cudnn.version()) 27 | 28 | if __name__ == '__main__': 29 | 30 | Start_T = time.time() 31 | ## ArgumentParser 32 | parser = argparse.ArgumentParser(description='HP Tuning') 33 | parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-U/') 34 | parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-U-Tuning.csv') 35 | parser.add_argument('--save_dir', type=str, default='eval/HP_tuning/uni/') 36 | parser.add_argument('--AD_Name', type=str, default='IForest') 37 | args = parser.parse_args() 38 | 39 | file_list = pd.read_csv(args.file_lsit)['file_name'].values 40 | 41 | Det_HP = Uni_algo_HP_dict[args.AD_Name] 42 | 43 | keys, values = zip(*Det_HP.items()) 44 | combinations = [dict(zip(keys, v)) for v in itertools.product(*values)] 45 | 46 | write_csv = [] 47 | for filename in file_list: 48 | print('Processing:{} by {}'.format(filename, args.AD_Name)) 49 | 50 | file_path = os.path.join(args.dataset_dir, filename) 51 | df = pd.read_csv(file_path).dropna() 52 | data = df.iloc[:, 0:-1].values.astype(float) 53 | label = df['Label'].astype(int).to_numpy() 54 | # print('data: ', data.shape) 55 | # print('label: ', label.shape) 56 | 57 | feats = data.shape[1] 58 | slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1) 59 | train_index = filename.split('.')[0].split('_')[-3] 60 | data_train = data[:int(train_index), :] 61 | 62 | for params in combinations: 63 | 64 | if args.AD_Name in Semisupervise_AD_Pool: 65 | output = run_Semisupervise_AD(args.AD_Name, data_train, data, **params) 66 | elif args.AD_Name in Unsupervise_AD_Pool: 67 | output = run_Unsupervise_AD(args.AD_Name, data, **params) 68 | else: 69 | raise Exception(f"{args.AD_Name} is not defined") 70 | 71 | try: 72 | evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow) 73 | print('evaluation_result: ', evaluation_result) 74 | list_w = list(evaluation_result.values()) 75 | except: 76 | list_w = [0]*9 77 | list_w.insert(0, params) 78 | list_w.insert(0, filename) 79 | write_csv.append(list_w) 80 | 81 | ## Temp Save 82 | col_w = list(evaluation_result.keys()) 83 | col_w.insert(0, 'HP') 84 | col_w.insert(0, 'file') 85 | w_csv = pd.DataFrame(write_csv, columns=col_w) 86 | 87 | w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False) -------------------------------------------------------------------------------- /benchmark_exp/README.md: -------------------------------------------------------------------------------- 1 | ### Scripts for running experiments/develop new methods in TSB-AD 2 | 3 | * Hper-parameter Tuning: HP_Tuning_U/M.py 4 | 5 | * Benchmark Evaluation: Run_Detector_U/M.py 6 | 7 | * `benchmark_eval_results/`: Evaluation results of anomaly detectors across different time series in TSB-AD 8 | * All time series are normalized by z-score by default 9 | 10 | * Develop your own algorithm: Run_Custom_Detector.py 11 | * Step 1: Implement `Custom_AD` class 12 | * Step 2: Implement model wrapper function `run_Custom_AD_Unsupervised` or `run_Custom_AD_Semisupervised` 13 | * Step 3: Specify `Custom_AD_HP` hyperparameter dict 14 | * Step 4: Run the custom algorithm either `run_Custom_AD_Unsupervised` or `run_Custom_AD_Semisupervised` 15 | * Step 5: Apply threshold to the anomaly score (if any) 16 | 17 | 🪧 How to commit your own algorithm to TSB-AD: you can send us the Run_Custom_Detector.py (replace Custom_Detector with the model name) to us via (i) [email](liu.11085@osu.edu) or (ii) open a pull request and add the file to `benchmark_exp` folder in `TSB-AD-algo` branch. We will test and evaluate the algorithm and include it in our [leaderboard](https://thedatumorg.github.io/TSB-AD/). -------------------------------------------------------------------------------- /benchmark_exp/Run_Custom_Detector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Qinghua Liu 3 | # License: Apache-2.0 License 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | import random, argparse, time, os, logging 9 | from sklearn.preprocessing import MinMaxScaler 10 | 11 | from TSB_AD.evaluation.metrics import get_metrics 12 | from TSB_AD.utils.slidingWindows import find_length_rank 13 | from TSB_AD.models.base import BaseDetector 14 | from TSB_AD.utils.utility import zscore 15 | 16 | class Custom_AD(BaseDetector): 17 | 18 | def __init__(self, HP, normalize=True): 19 | super().__init__() 20 | self.HP = HP 21 | self.normalize = normalize 22 | 23 | def fit(self, X, y=None): 24 | """Fit detector. y is ignored in unsupervised methods. 25 | 26 | Parameters 27 | ---------- 28 | X : numpy array of shape (n_samples, n_features) 29 | The input samples. 30 | 31 | y : Ignored 32 | Not used, present for API consistency by convention. 33 | 34 | Returns 35 | ------- 36 | self : object 37 | Fitted estimator. 38 | """ 39 | n_samples, n_features = X.shape 40 | if self.normalize: X = zscore(X, axis=1, ddof=1) 41 | 42 | self.decision_scores_ = np.zeros(n_samples) 43 | return self 44 | 45 | def decision_function(self, X): 46 | """Predict raw anomaly score of X using the fitted detector. 47 | 48 | The anomaly score of an input sample is computed based on different 49 | detector algorithms. For consistency, outliers are assigned with 50 | larger anomaly scores. 51 | 52 | Parameters 53 | ---------- 54 | X : numpy array of shape (n_samples, n_features) 55 | The training input samples. Sparse matrices are accepted only 56 | if they are supported by the base estimator. 57 | 58 | Returns 59 | ------- 60 | anomaly_scores : numpy array of shape (n_samples,) 61 | The anomaly score of the input samples. 62 | """ 63 | n_samples, n_features = X.shape 64 | decision_scores_ = np.zeros(n_samples) 65 | return decision_scores_ 66 | 67 | 68 | def run_Custom_AD_Unsupervised(data, HP): 69 | clf = Custom_AD(HP=HP) 70 | clf.fit(data) 71 | score = clf.decision_scores_ 72 | score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel() 73 | return score 74 | 75 | def run_Custom_AD_Semisupervised(data_train, data_test, HP): 76 | clf = Custom_AD(HP=HP) 77 | clf.fit(data_train) 78 | score = clf.decision_function(data_test) 79 | score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel() 80 | return score 81 | 82 | if __name__ == '__main__': 83 | 84 | Start_T = time.time() 85 | ## ArgumentParser 86 | parser = argparse.ArgumentParser(description='Running Custom_AD') 87 | parser.add_argument('--filename', type=str, default='001_NAB_id_1_Facility_tr_1007_1st_2014.csv') 88 | parser.add_argument('--data_direc', type=str, default='../Datasets/TSB-AD-U/') 89 | parser.add_argument('--AD_Name', type=str, default='Custom_AD') 90 | args = parser.parse_args() 91 | 92 | Custom_AD_HP = { 93 | 'HP': ['HP'], 94 | } 95 | 96 | df = pd.read_csv(args.data_direc + args.filename).dropna() 97 | data = df.iloc[:, 0:-1].values.astype(float) 98 | label = df['Label'].astype(int).to_numpy() 99 | print('data: ', data.shape) 100 | print('label: ', label.shape) 101 | 102 | slidingWindow = find_length_rank(data, rank=1) 103 | train_index = args.filename.split('.')[0].split('_')[-3] 104 | data_train = data[:int(train_index), :] 105 | 106 | start_time = time.time() 107 | 108 | output = run_Custom_AD_Semisupervised(data_train, data, **Custom_AD_HP) 109 | # output = run_Custom_AD_Unsupervised(data, **Custom_AD_HP) 110 | 111 | end_time = time.time() 112 | run_time = end_time - start_time 113 | 114 | pred = output > (np.mean(output)+3*np.std(output)) 115 | evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow, pred=pred) 116 | print('Evaluation Result: ', evaluation_result) -------------------------------------------------------------------------------- /benchmark_exp/Run_Detector_M.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Qinghua Liu 3 | # License: Apache-2.0 License 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | import random, argparse, time, os, logging 9 | from TSB_AD.evaluation.metrics import get_metrics 10 | from TSB_AD.utils.slidingWindows import find_length_rank 11 | from TSB_AD.model_wrapper import * 12 | from TSB_AD.HP_list import Optimal_Multi_algo_HP_dict 13 | 14 | # seeding 15 | seed = 2024 16 | torch.manual_seed(seed) 17 | torch.cuda.manual_seed(seed) 18 | torch.cuda.manual_seed_all(seed) 19 | np.random.seed(seed) 20 | random.seed(seed) 21 | torch.backends.cudnn.benchmark = False 22 | torch.backends.cudnn.deterministic = True 23 | 24 | print("CUDA available: ", torch.cuda.is_available()) 25 | print("cuDNN version: ", torch.backends.cudnn.version()) 26 | 27 | if __name__ == '__main__': 28 | 29 | Start_T = time.time() 30 | ## ArgumentParser 31 | parser = argparse.ArgumentParser(description='Generating Anomaly Score') 32 | parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-M/') 33 | parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-M-Eva.csv') 34 | parser.add_argument('--score_dir', type=str, default='eval/score/multi/') 35 | parser.add_argument('--save_dir', type=str, default='eval/metrics/multi/') 36 | parser.add_argument('--save', type=bool, default=False) 37 | parser.add_argument('--AD_Name', type=str, default='IForest') 38 | args = parser.parse_args() 39 | 40 | 41 | target_dir = os.path.join(args.score_dir, args.AD_Name) 42 | os.makedirs(target_dir, exist_ok = True) 43 | logging.basicConfig(filename=f'{target_dir}/000_run_{args.AD_Name}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 44 | 45 | file_list = pd.read_csv(args.file_lsit)['file_name'].values 46 | Optimal_Det_HP = Optimal_Multi_algo_HP_dict[args.AD_Name] 47 | print('Optimal_Det_HP: ', Optimal_Det_HP) 48 | 49 | write_csv = [] 50 | for filename in file_list: 51 | if os.path.exists(target_dir+'/'+filename.split('.')[0]+'.npy'): continue 52 | print('Processing:{} by {}'.format(filename, args.AD_Name)) 53 | 54 | file_path = os.path.join(args.dataset_dir, filename) 55 | df = pd.read_csv(file_path).dropna() 56 | data = df.iloc[:, 0:-1].values.astype(float) 57 | label = df['Label'].astype(int).to_numpy() 58 | # print('data: ', data.shape) 59 | # print('label: ', label.shape) 60 | 61 | feats = data.shape[1] 62 | slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1) 63 | train_index = filename.split('.')[0].split('_')[-3] 64 | data_train = data[:int(train_index), :] 65 | 66 | start_time = time.time() 67 | 68 | if args.AD_Name in Semisupervise_AD_Pool: 69 | output = run_Semisupervise_AD(args.AD_Name, data_train, data, **Optimal_Det_HP) 70 | elif args.AD_Name in Unsupervise_AD_Pool: 71 | output = run_Unsupervise_AD(args.AD_Name, data, **Optimal_Det_HP) 72 | else: 73 | raise Exception(f"{args.AD_Name} is not defined") 74 | 75 | end_time = time.time() 76 | run_time = end_time - start_time 77 | 78 | if isinstance(output, np.ndarray): 79 | logging.info(f'Success at {filename} using {args.AD_Name} | Time cost: {run_time:.3f}s at length {len(label)}') 80 | np.save(target_dir+'/'+filename.split('.')[0]+'.npy', output) 81 | else: 82 | logging.error(f'At {filename}: '+output) 83 | 84 | ### whether to save the evaluation result 85 | if args.save: 86 | try: 87 | evaluation_result = get_metrics(output, label, metric='all', slidingWindow=slidingWindow) 88 | print('evaluation_result: ', evaluation_result) 89 | list_w = list(evaluation_result.values()) 90 | except: 91 | list_w = [0]*9 92 | list_w.insert(0, run_time) 93 | list_w.insert(0, filename) 94 | write_csv.append(list_w) 95 | 96 | ## Temp Save 97 | col_w = list(evaluation_result.keys()) 98 | col_w.insert(0, 'Time') 99 | col_w.insert(0, 'file') 100 | w_csv = pd.DataFrame(write_csv, columns=col_w) 101 | w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False) -------------------------------------------------------------------------------- /benchmark_exp/Run_Detector_U.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Qinghua Liu 3 | # License: Apache-2.0 License 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | import random, argparse, time, os, logging 9 | from TSB_AD.evaluation.metrics import get_metrics 10 | from TSB_AD.utils.slidingWindows import find_length_rank 11 | from TSB_AD.model_wrapper import * 12 | from TSB_AD.HP_list import Optimal_Uni_algo_HP_dict 13 | 14 | # seeding 15 | seed = 2024 16 | torch.manual_seed(seed) 17 | torch.cuda.manual_seed(seed) 18 | torch.cuda.manual_seed_all(seed) 19 | np.random.seed(seed) 20 | random.seed(seed) 21 | torch.backends.cudnn.benchmark = False 22 | torch.backends.cudnn.deterministic = True 23 | 24 | print("CUDA available: ", torch.cuda.is_available()) 25 | print("cuDNN version: ", torch.backends.cudnn.version()) 26 | 27 | if __name__ == '__main__': 28 | 29 | Start_T = time.time() 30 | ## ArgumentParser 31 | parser = argparse.ArgumentParser(description='Generating Anomaly Score') 32 | parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-U/') 33 | parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-U-Eva.csv') 34 | parser.add_argument('--score_dir', type=str, default='eval/score/uni/') 35 | parser.add_argument('--save_dir', type=str, default='eval/metrics/uni/') 36 | parser.add_argument('--save', type=bool, default=False) 37 | parser.add_argument('--AD_Name', type=str, default='IForest') 38 | args = parser.parse_args() 39 | 40 | 41 | target_dir = os.path.join(args.score_dir, args.AD_Name) 42 | os.makedirs(target_dir, exist_ok = True) 43 | logging.basicConfig(filename=f'{target_dir}/000_run_{args.AD_Name}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 44 | 45 | file_list = pd.read_csv(args.file_lsit)['file_name'].values 46 | Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name] 47 | print('Optimal_Det_HP: ', Optimal_Det_HP) 48 | 49 | write_csv = [] 50 | for filename in file_list: 51 | if os.path.exists(target_dir+'/'+filename.split('.')[0]+'.npy'): continue 52 | print('Processing:{} by {}'.format(filename, args.AD_Name)) 53 | 54 | file_path = os.path.join(args.dataset_dir, filename) 55 | df = pd.read_csv(file_path).dropna() 56 | data = df.iloc[:, 0:-1].values.astype(float) 57 | label = df['Label'].astype(int).to_numpy() 58 | # print('data: ', data.shape) 59 | # print('label: ', label.shape) 60 | 61 | feats = data.shape[1] 62 | slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1) 63 | train_index = filename.split('.')[0].split('_')[-3] 64 | data_train = data[:int(train_index), :] 65 | 66 | start_time = time.time() 67 | 68 | if args.AD_Name in Semisupervise_AD_Pool: 69 | output = run_Semisupervise_AD(args.AD_Name, data_train, data, **Optimal_Det_HP) 70 | elif args.AD_Name in Unsupervise_AD_Pool: 71 | output = run_Unsupervise_AD(args.AD_Name, data, **Optimal_Det_HP) 72 | else: 73 | raise Exception(f"{args.AD_Name} is not defined") 74 | 75 | end_time = time.time() 76 | run_time = end_time - start_time 77 | 78 | if isinstance(output, np.ndarray): 79 | logging.info(f'Success at {filename} using {args.AD_Name} | Time cost: {run_time:.3f}s at length {len(label)}') 80 | np.save(target_dir+'/'+filename.split('.')[0]+'.npy', output) 81 | else: 82 | logging.error(f'At {filename}: '+output) 83 | 84 | ### whether to save the evaluation result 85 | if args.save: 86 | try: 87 | evaluation_result = get_metrics(output, label, metric='all', slidingWindow=slidingWindow) 88 | print('evaluation_result: ', evaluation_result) 89 | list_w = list(evaluation_result.values()) 90 | except: 91 | list_w = [0]*9 92 | list_w.insert(0, run_time) 93 | list_w.insert(0, filename) 94 | write_csv.append(list_w) 95 | 96 | ## Temp Save 97 | col_w = list(evaluation_result.keys()) 98 | col_w.insert(0, 'Time') 99 | col_w.insert(0, 'file') 100 | w_csv = pd.DataFrame(write_csv, columns=col_w) 101 | w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False) -------------------------------------------------------------------------------- /benchmark_exp/benchmark_eval_results/README.md: -------------------------------------------------------------------------------- 1 | Evaluation results computed based on the lasted version of [VUS](https://arxiv.org/abs/2502.13318). 2 | 3 | The implemention in [`get_metrics`](https://github.com/TheDatumOrg/TSB-AD/blob/dce9e5e5ec14ffb82787315dbf1a8564c7bb9f47/TSB_AD/evaluation/metrics.py#L3) is up-to-date. -------------------------------------------------------------------------------- /docs/static/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/.DS_Store -------------------------------------------------------------------------------- /docs/static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /docs/static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | 6 | .footer .icon-link { 7 | font-size: 25px; 8 | color: #000; 9 | } 10 | 11 | .link-block a { 12 | margin-top: 5px; 13 | margin-bottom: 5px; 14 | } 15 | 16 | .dnerf { 17 | font-variant: small-caps; 18 | } 19 | 20 | 21 | .teaser .hero-body { 22 | padding-top: 0; 23 | padding-bottom: 3rem; 24 | } 25 | 26 | .teaser { 27 | font-family: 'Google Sans', sans-serif; 28 | } 29 | 30 | 31 | .publication-title { 32 | } 33 | 34 | .publication-banner { 35 | max-height: parent; 36 | 37 | } 38 | 39 | .publication-banner video { 40 | position: relative; 41 | left: auto; 42 | top: auto; 43 | transform: none; 44 | object-fit: fit; 45 | } 46 | 47 | .publication-header .hero-body { 48 | } 49 | 50 | .publication-title { 51 | font-family: 'Google Sans', sans-serif; 52 | } 53 | 54 | .publication-authors { 55 | font-family: 'Google Sans', sans-serif; 56 | } 57 | 58 | .publication-venue { 59 | color: #555; 60 | width: fit-content; 61 | font-weight: bold; 62 | } 63 | 64 | .publication-awards { 65 | color: #ff3860; 66 | width: fit-content; 67 | font-weight: bolder; 68 | } 69 | 70 | .publication-authors { 71 | } 72 | 73 | .publication-authors a { 74 | color: hsl(204, 86%, 53%) !important; 75 | } 76 | 77 | .publication-authors a:hover { 78 | text-decoration: underline; 79 | } 80 | 81 | .author-block { 82 | display: inline-block; 83 | } 84 | 85 | .publication-banner img { 86 | } 87 | 88 | .publication-authors { 89 | /*color: #4286f4;*/ 90 | } 91 | 92 | .publication-video { 93 | position: relative; 94 | width: 100%; 95 | height: 0; 96 | padding-bottom: 56.25%; 97 | 98 | overflow: hidden; 99 | border-radius: 10px !important; 100 | } 101 | 102 | .publication-video iframe { 103 | position: absolute; 104 | top: 0; 105 | left: 0; 106 | width: 100%; 107 | height: 100%; 108 | } 109 | 110 | .publication-body img { 111 | } 112 | 113 | .results-carousel { 114 | overflow: hidden; 115 | } 116 | 117 | .results-carousel .item { 118 | margin: 5px; 119 | overflow: hidden; 120 | padding: 20px; 121 | font-size: 0; 122 | } 123 | 124 | .results-carousel video { 125 | margin: 0; 126 | } 127 | 128 | .slider-pagination .slider-page { 129 | background: #000000; 130 | } 131 | 132 | .eql-cntrb { 133 | font-size: smaller; 134 | } 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /docs/static/images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/images/.DS_Store -------------------------------------------------------------------------------- /docs/static/images/tsb_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/images/tsb_overview.png -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default}); -------------------------------------------------------------------------------- /docs/static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | 4 | $(document).ready(function() { 5 | // Check for click events on the navbar burger icon 6 | 7 | var options = { 8 | slidesToScroll: 1, 9 | slidesToShow: 1, 10 | loop: true, 11 | infinite: true, 12 | autoplay: true, 13 | autoplaySpeed: 5000, 14 | } 15 | 16 | // Initialize all div with carousel class 17 | var carousels = bulmaCarousel.attach('.carousel', options); 18 | 19 | bulmaSlider.attach(); 20 | 21 | }) 22 | -------------------------------------------------------------------------------- /docs/static/leaderboard/TSB-AD-M.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Leaderboard 8 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 |
MethodAUC-PRAUC-ROCVUS-PRVUS-ROCStandard-F1PA-F1Event-based-F1R-based-F1Affiliation-F1
🥇 CNN0.320.730.310.760.370.780.650.370.87
🥈 OmniAnomaly0.270.650.310.690.320.550.410.370.81
🥉 PCA0.310.700.310.740.370.790.590.290.85
LSTMAD0.310.700.310.740.360.790.640.380.87
USAD0.260.640.300.680.310.530.400.370.80
AutoEncoder0.300.670.300.690.340.600.440.280.80
KMeansAD0.250.690.290.730.310.680.490.330.82
CBLOF0.280.670.270.700.320.650.450.310.81
MCD0.270.650.270.690.330.460.330.200.76
OCSVM0.230.610.260.670.280.480.410.300.80
Donut0.200.640.260.710.280.520.360.210.81
RobustPCA0.240.580.240.610.290.600.420.330.81
FITS0.150.580.210.660.220.720.320.160.81
OFA0.150.550.210.630.210.720.410.170.83
EIF0.190.670.210.710.260.740.440.260.81
COPOD0.200.650.200.690.270.720.410.240.80
IForest0.190.660.200.690.260.680.410.240.80
HBOS0.160.630.190.670.240.670.400.240.80
TimesNet0.130.560.190.640.200.680.320.170.82
KNN0.140.510.180.590.190.690.450.210.79
TranAD0.140.590.180.650.210.680.400.210.79
LOF0.100.530.140.600.150.570.320.140.76
AnomalyTransformer0.070.520.120.570.120.530.330.140.74
95 | 96 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /docs/static/leaderboard/TSB-AD-U.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Leaderboard 8 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 |
MethodAUC-PRAUC-ROCVUS-PRVUS-ROCStandard-F1PA-F1Event-based-F1R-based-F1Affiliation-F1
🥇 Sub-PCA0.370.710.420.760.420.560.490.410.85
🥈 KShapeAD0.350.740.400.760.390.580.460.400.83
🥉 POLY0.310.730.390.760.370.530.450.350.85
Series2Graph0.330.760.390.800.380.650.500.350.85
MOMENT (FT)0.300.690.390.760.350.650.490.350.86
MOMENT (ZS)0.300.680.380.750.350.610.490.360.86
KMeansAD0.320.740.370.760.370.560.440.380.82
USAD0.320.660.360.710.370.500.430.400.84
Sub-KNN0.270.760.350.790.340.610.430.320.84
MatrixProfile0.260.730.350.760.330.630.440.320.84
SAND0.290.730.340.760.350.560.420.360.81
CNN0.330.710.340.790.380.780.660.350.88
LSTMAD0.310.680.330.760.370.710.590.340.86
SR0.320.740.320.810.380.870.670.350.89
TimesFM0.280.670.300.740.340.840.630.340.89
IForest0.290.710.300.780.350.730.560.300.84
OmniAnomaly0.270.650.290.720.310.590.460.290.83
Lag-Llama0.250.650.270.720.300.770.590.310.88
Chronos0.260.660.270.730.320.830.610.330.88
TimesNet0.180.610.260.720.240.670.470.210.86
AutoEncoder0.190.630.260.690.250.540.360.280.82
TranAD0.200.570.260.680.250.580.430.250.83
FITS0.170.610.260.730.230.650.420.200.86
Sub-LOF0.160.680.250.730.240.570.350.250.82
OFA0.160.590.240.710.220.670.450.200.86
Sub-MCD0.150.670.240.720.230.540.320.240.81
Sub-HBOS0.180.610.230.670.230.600.350.270.79
Sub-OCSVM0.160.650.230.730.220.550.320.230.79
Sub-IForest0.160.630.220.720.220.630.340.230.80
Donut0.140.560.200.680.200.570.380.200.82
LOF0.140.580.170.680.210.630.400.220.79
AnomalyTransformer0.080.500.120.560.120.530.340.140.77
104 | 105 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /docs/static/pdfs/TSB-AD-NeurIPS24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/pdfs/TSB-AD-NeurIPS24.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | torchinfo 3 | h5py 4 | einops 5 | numpy>=1.24.3,<2.0 6 | matplotlib>=3.7.5 7 | pandas>=2.0.3 8 | arch>=5.3.1 9 | hurst>=0.0.5 10 | tslearn>=0.6.3 11 | cython>=3.0.10 12 | scikit-learn>=1.3.2 13 | stumpy>=1.12.0 14 | networkx>=3.1 15 | transformers>=4.38.0 16 | torch==2.3.0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from io import open 3 | 4 | setup( 5 | name='TSB_AD', # Replace with your own package name 6 | version='1.5', # The version of your package 7 | author='The Datum Lab', # Your name 8 | description='Time-Series Anomaly Detection Benchmark', # A short description 9 | long_description=open('README.md', encoding='utf-8').read(), # Long description read from the README.md 10 | long_description_content_type='text/markdown', # Type of the long description, typically text/markdown or text/x-rst 11 | url='https://github.com/TheDatumOrg/TSB-AD', # Link to the repository or website 12 | packages=find_packages(), # List of all Python import packages that should be included in the Distribution Package 13 | classifiers=[ 14 | # Trove classifiers 15 | # Full list: https://pypi.org/classifiers/ 16 | 'Development Status :: 3 - Alpha', 17 | 'Intended Audience :: Developers', 18 | 'Topic :: Software Development :: Build Tools', 19 | 'License :: OSI Approved :: Apache Software License', 20 | 'Programming Language :: Python :: 3.9', 21 | 'Programming Language :: Python :: 3.10', 22 | 'Programming Language :: Python :: 3.11', 23 | ], 24 | install_requires=[ 25 | 'tqdm', 26 | 'torchinfo', 27 | 'h5py', 28 | 'einops', 29 | 'numpy>=1.24.3,<2.0', 30 | 'matplotlib>=3.7.5', 31 | 'pandas>=2.0.3', 32 | 'arch>=5.3.1', 33 | 'hurst>=0.0.5', 34 | 'tslearn>=0.6.3', 35 | 'cython>=3.0.10', 36 | 'scikit-learn>=1.3.2', 37 | 'stumpy>=1.12.0', 38 | 'networkx>=3.1', 39 | 'transformers>=4.38.0', 40 | 'torch>=1.8.0', 41 | ], 42 | python_requires='>=3.8', # Minimum version requirement of the package 43 | entry_points={}, 44 | license="Apache-2.0 license", 45 | include_package_data=True, # Whether to include non-code files in the package 46 | zip_safe=False, # Whether the package can be run out of a zip file 47 | ) --------------------------------------------------------------------------------