├── checkpoints
    └── BieqAbdL_checkpoint.pt
├── model_sinne
    ├── utils.py
    └── SiNNE.py
├── data_od_evaluation
    ├── wbc_pca_gt_hbos.csv
    ├── wbc_pca_gt_iforest.csv
    ├── vertebral_gt_hbos.csv
    ├── wbc_pca_gt_copod.csv
    ├── vertebral_gt_iforest.csv
    ├── vertebral_gt_copod.csv
    ├── speech_pca_gt_hbos.csv
    ├── SPECT_pca_gt_hbos.csv
    ├── arrhythmia_pca_gt_hbos.csv
    ├── SPECT_pca_gt_iforest.csv
    ├── SPECT_pca_gt_copod.csv
    ├── satimage-2_pca_gt_hbos.csv
    ├── wineQualityReds-od2_gt_hbos.csv
    ├── arrhythmia_pca_gt_iforest.csv
    ├── arrhythmia_pca_gt_copod.csv
    ├── wineQualityReds-od2_gt_iforest.csv
    ├── speech_pca_gt_iforest.csv
    ├── speech_pca_gt_copod.csv
    ├── wineQualityReds-od2_gt_copod.csv
    ├── satimage-2_pca_gt_iforest.csv
    ├── satimage-2_pca_gt_copod.csv
    ├── letter_pca_gt_hbos.csv
    ├── ionosphere_pca_gt_hbos.csv
    ├── letter_pca_gt_copod.csv
    ├── letter_pca_gt_iforest.csv
    ├── ionosphere_pca_gt_iforest.csv
    ├── optdigits_pca_gt_hbos.csv
    ├── ionosphere_pca_gt_copod.csv
    ├── wineQualityWhites-od2_gt_hbos.csv
    ├── wineQualityWhites-od2_gt_iforest.csv
    ├── optdigits_pca_gt_copod.csv
    ├── optdigits_pca_gt_iforest.csv
    ├── wineQualityWhites-od2_gt_copod.csv
    ├── pima_gt_hbos.csv
    ├── pima_gt_iforest.csv
    └── pima_gt_copod.csv
├── utils
    ├── eval_print_utils.py
    ├── model_utils.py
    └── synthetic_generator.py
├── eval
    ├── evaluation_utils.py
    ├── eva_main.py
    └── evaluation_od.py
├── model_coin
    ├── utils.py
    ├── prediction_strength.py
    └── COIN.py
├── model_iml
    ├── LIME.py
    ├── Anchor.py
    ├── IntGrad.py
    └── SHAP.py
├── model_aton
    ├── utils.py
    ├── ATON_ablation.py
    ├── networks.py
    ├── ATON_ablation2.py
    ├── ATON.py
    ├── ATON_ablation3.py
    └── datasets.py
├── config.py
├── main2.py
├── README.md
├── main.py
├── data
    └── 01-vertebral.csv
└── LICENSE


/checkpoints/BieqAbdL_checkpoint.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/outlier-interpretation/HEAD/checkpoints/BieqAbdL_checkpoint.pt


--------------------------------------------------------------------------------
/model_sinne/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def powerset(s):
 5 |     x = len(s)
 6 |     pw_set = []
 7 |     for i in range(1 << x):
 8 |         pw_set.append([s[j] for j in range(x) if (i & (1 << j))])
 9 |     return pw_set
10 | 
11 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wbc_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 357,"[2, 3]"
 3 | 358,"[1, 4]"
 4 | 359,"[1, 2, 4]"
 5 | 360,[5]
 6 | 361,"[0, 2]"
 7 | 362,[1]
 8 | 363,"[2, 3, 5]"
 9 | 364,"[1, 2, 4, 6]"
10 | 365,"[0, 5, 7, 8]"
11 | 366,[0]
12 | 367,[0]
13 | 368,[4]
14 | 369,[6]
15 | 370,"[0, 2, 3]"
16 | 371,"[0, 1, 3, 7]"
17 | 372,"[0, 1]"
18 | 373,"[3, 4, 5, 9]"
19 | 374,"[2, 3]"
20 | 375,[0]
21 | 376,"[0, 2, 6]"
22 | 377,[0]
23 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wbc_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 357,[3]
 3 | 358,"[1, 5]"
 4 | 359,"[2, 5, 8]"
 5 | 360,[5]
 6 | 361,"[1, 2]"
 7 | 362,"[1, 4, 5, 7, 9]"
 8 | 363,"[5, 8]"
 9 | 364,[1]
10 | 365,"[1, 2, 7, 8]"
11 | 366,"[0, 1, 5]"
12 | 367,"[0, 2]"
13 | 368,[4]
14 | 369,"[2, 6, 8]"
15 | 370,"[0, 1, 3, 5]"
16 | 371,[1]
17 | 372,"[1, 4, 7, 9]"
18 | 373,"[3, 4, 5, 8]"
19 | 374,[3]
20 | 375,[0]
21 | 376,"[0, 2]"
22 | 377,"[0, 2, 3, 5, 7]"
23 | 


--------------------------------------------------------------------------------
/data_od_evaluation/vertebral_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 210,"[0, 2]"
 3 | 211,[4]
 4 | 212,[0]
 5 | 213,[4]
 6 | 214,[4]
 7 | 215,[1]
 8 | 216,"[2, 3]"
 9 | 217,[1]
10 | 218,[4]
11 | 219,[1]
12 | 220,[4]
13 | 221,[4]
14 | 222,[0]
15 | 223,[1]
16 | 224,[1]
17 | 225,[4]
18 | 226,[4]
19 | 227,"[0, 1]"
20 | 228,[4]
21 | 229,[4]
22 | 230,[1]
23 | 231,"[2, 3]"
24 | 232,[1]
25 | 233,[4]
26 | 234,[4]
27 | 235,[0]
28 | 236,[3]
29 | 237,[1]
30 | 238,[4]
31 | 239,[4]
32 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wbc_pca_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 357,"[2, 6]"
 3 | 358,"[0, 1, 4, 5]"
 4 | 359,"[2, 8]"
 5 | 360,[5]
 6 | 361,"[1, 2]"
 7 | 362,"[7, 9]"
 8 | 363,[8]
 9 | 364,"[2, 4, 8, 9]"
10 | 365,"[4, 6, 8]"
11 | 366,"[0, 2, 3, 5]"
12 | 367,"[0, 3, 7]"
13 | 368,"[0, 2, 4, 5]"
14 | 369,"[2, 6]"
15 | 370,"[0, 1, 2, 7, 8]"
16 | 371,"[0, 1, 2, 7, 8]"
17 | 372,"[2, 4, 9]"
18 | 373,"[0, 2, 5]"
19 | 374,"[2, 6]"
20 | 375,[0]
21 | 376,"[0, 2, 4, 6, 7]"
22 | 377,"[0, 2]"
23 | 


--------------------------------------------------------------------------------
/data_od_evaluation/vertebral_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 210,[0]
 3 | 211,[1]
 4 | 212,[0]
 5 | 213,"[1, 4]"
 6 | 214,"[1, 4]"
 7 | 215,[1]
 8 | 216,"[1, 2, 4]"
 9 | 217,"[0, 5]"
10 | 218,"[0, 2]"
11 | 219,[1]
12 | 220,"[1, 2, 4]"
13 | 221,[4]
14 | 222,"[0, 4]"
15 | 223,"[1, 4]"
16 | 224,"[0, 2]"
17 | 225,"[3, 5]"
18 | 226,[5]
19 | 227,[1]
20 | 228,"[0, 2]"
21 | 229,"[0, 4]"
22 | 230,"[1, 3, 5]"
23 | 231,"[2, 4]"
24 | 232,"[1, 2, 3]"
25 | 233,"[3, 5]"
26 | 234,"[1, 4]"
27 | 235,"[3, 5]"
28 | 236,[4]
29 | 237,"[1, 4]"
30 | 238,[5]
31 | 239,"[0, 4]"
32 | 


--------------------------------------------------------------------------------
/data_od_evaluation/vertebral_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 210,[4]
 3 | 211,"[1, 4]"
 4 | 212,"[1, 4]"
 5 | 213,"[1, 4]"
 6 | 214,[4]
 7 | 215,"[1, 5]"
 8 | 216,"[1, 2, 3, 5]"
 9 | 217,"[0, 1]"
10 | 218,[2]
11 | 219,"[1, 5]"
12 | 220,"[0, 1, 2, 4]"
13 | 221,[4]
14 | 222,"[3, 4]"
15 | 223,"[1, 4]"
16 | 224,"[1, 2, 3, 5]"
17 | 225,"[3, 4]"
18 | 226,[4]
19 | 227,"[0, 1, 2, 4]"
20 | 228,"[0, 4]"
21 | 229,"[0, 4]"
22 | 230,"[1, 5]"
23 | 231,"[2, 3, 4]"
24 | 232,"[1, 5]"
25 | 233,[3]
26 | 234,[4]
27 | 235,[5]
28 | 236,"[3, 4]"
29 | 237,"[1, 4]"
30 | 238,"[4, 5]"
31 | 239,"[3, 4]"
32 | 


--------------------------------------------------------------------------------
/utils/eval_print_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def print_eval_runs2(runs_metric_lst, data_name, algo_name):
 5 |     runs_metric_lst = np.array(runs_metric_lst)
 6 |     precision, recall, jaccard, t = np.average(runs_metric_lst, axis=0)
 7 |     txt = "%s, od_eval, [p,r,j], %.4f, %.4f, %.4f, time, %.2f, %s" % \
 8 |           (data_name, precision, recall, jaccard, t, algo_name)
 9 |     return txt
10 | 
11 | 
12 | def print_eval_runs(runs_metric_lst, data_name, algo_name):
13 |     runs_metric_lst = np.array(runs_metric_lst)
14 |     precision, recall, jaccard, aupr, auroc, t = np.average(runs_metric_lst, axis=0)
15 |     txt = "%s, [p r j aupr auroc], %.4f, %.4f, %.4f, %.4f, %.4f, time, %.2f, %s" % \
16 |           (data_name, precision, recall, jaccard, aupr, auroc, t, algo_name)
17 |     return txt
18 | 
19 | 


--------------------------------------------------------------------------------
/eval/evaluation_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def powerset(s):
 5 |     x = len(s)
 6 |     pw_set = []
 7 |     for i in range(1 << x):
 8 |         pw_set.append([s[j] for j in range(x) if (i & (1 << j))])
 9 |     return pw_set
10 | 
11 | 
12 | def min_max_norm(array):
13 |     if np.min(array) == np.max(array):
14 |         return array * 0
15 |     else:
16 |         return (array - np.min(array))/(np.max(array) - np.min(array))
17 | 
18 | 
19 | def get_subset_candidate(dim, chosen_subspace=None):
20 |     if chosen_subspace is not None:
21 |         f_subsets = []
22 |         for subset in chosen_subspace:
23 |             subset = list(subset)
24 |             if subset not in f_subsets:
25 |                 f_subsets.append(list(subset))
26 |     else:
27 |         full_set = np.arange(dim)
28 |         f_subsets = powerset(full_set)
29 |         f_subsets.remove([])
30 |     return f_subsets
31 | 


--------------------------------------------------------------------------------
/data_od_evaluation/speech_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 0,[1]
 3 | 1,[7]
 4 | 2,[3]
 5 | 3,[3]
 6 | 4,[3]
 7 | 5,[8]
 8 | 6,"[0, 1, 2, 4]"
 9 | 7,[6]
10 | 8,"[0, 1, 7, 8]"
11 | 9,[1]
12 | 10,"[1, 2, 4, 6]"
13 | 11,[1]
14 | 12,[1]
15 | 13,"[1, 4]"
16 | 14,"[2, 3, 5, 7]"
17 | 15,"[0, 1, 4, 6]"
18 | 16,[1]
19 | 17,[3]
20 | 18,"[1, 8, 9]"
21 | 19,[8]
22 | 20,[2]
23 | 21,[1]
24 | 22,[1]
25 | 23,[1]
26 | 24,[9]
27 | 25,[7]
28 | 26,[1]
29 | 27,[6]
30 | 28,"[1, 2, 3, 9]"
31 | 29,"[1, 4, 7]"
32 | 30,"[2, 3, 4]"
33 | 31,[4]
34 | 32,[1]
35 | 33,"[1, 9]"
36 | 34,"[0, 4, 5, 8]"
37 | 35,"[0, 2, 4, 5, 8, 9]"
38 | 36,"[0, 2]"
39 | 37,[0]
40 | 38,[5]
41 | 39,[7]
42 | 40,[1]
43 | 41,"[2, 5]"
44 | 42,[6]
45 | 43,[2]
46 | 44,"[2, 4, 5, 6, 7]"
47 | 45,[2]
48 | 46,[4]
49 | 47,"[3, 8]"
50 | 48,[2]
51 | 49,"[0, 2, 5, 6, 9]"
52 | 50,"[0, 2, 4, 9]"
53 | 51,[2]
54 | 52,"[2, 3, 5]"
55 | 53,[2]
56 | 54,"[3, 6]"
57 | 55,[7]
58 | 56,[1]
59 | 57,"[0, 4, 6, 7, 8]"
60 | 58,[8]
61 | 59,"[0, 1, 2, 6, 7, 8]"
62 | 60,"[1, 4, 6, 9]"
63 | 


--------------------------------------------------------------------------------
/data_od_evaluation/SPECT_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 40,"[1, 2, 4, 6]"
 3 | 41,"[1, 7]"
 4 | 42,"[2, 3, 4, 7, 8]"
 5 | 43,"[2, 3, 8, 9]"
 6 | 44,"[2, 5]"
 7 | 45,[6]
 8 | 46,"[0, 4, 6, 7, 9]"
 9 | 47,"[1, 2, 4, 6, 8]"
10 | 48,[0]
11 | 49,"[4, 9]"
12 | 50,"[2, 3, 8]"
13 | 51,[0]
14 | 52,[0]
15 | 53,"[0, 1, 3, 6]"
16 | 54,"[1, 2, 4, 5, 6]"
17 | 55,"[1, 2]"
18 | 56,"[1, 4, 8, 9]"
19 | 57,"[0, 1, 4, 9]"
20 | 58,[0]
21 | 59,[0]
22 | 60,"[0, 1, 2, 8, 9]"
23 | 61,[0]
24 | 62,"[1, 2, 4, 6]"
25 | 63,[0]
26 | 64,[0]
27 | 65,[0]
28 | 66,"[1, 2, 4, 6]"
29 | 67,[0]
30 | 68,[1]
31 | 69,[0]
32 | 70,[6]
33 | 71,"[1, 2, 3, 5, 9]"
34 | 72,"[1, 2, 3, 4, 8, 9]"
35 | 73,[3]
36 | 74,[0]
37 | 75,"[1, 3]"
38 | 76,[1]
39 | 77,"[0, 1, 3, 8, 9]"
40 | 78,[1]
41 | 79,[1]
42 | 252,[0]
43 | 253,"[1, 2, 4, 6]"
44 | 254,[0]
45 | 255,"[2, 4, 7, 9]"
46 | 256,"[2, 3, 4, 7, 8]"
47 | 257,[0]
48 | 258,[0]
49 | 259,"[1, 8]"
50 | 260,"[6, 8, 9]"
51 | 261,[0]
52 | 262,[0]
53 | 263,"[2, 7]"
54 | 264,"[2, 3, 4, 6, 7, 9]"
55 | 265,"[0, 1, 2, 8, 9]"
56 | 266,[0]
57 | 


--------------------------------------------------------------------------------
/data_od_evaluation/arrhythmia_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 0,[9]
 3 | 4,[9]
 4 | 5,"[1, 6, 9]"
 5 | 10,[1]
 6 | 26,[9]
 7 | 34,[2]
 8 | 45,[4]
 9 | 46,[6]
10 | 60,[2]
11 | 61,[1]
12 | 76,"[1, 5, 7]"
13 | 83,[1]
14 | 85,[1]
15 | 87,[3]
16 | 88,[1]
17 | 89,[2]
18 | 91,[4]
19 | 93,[2]
20 | 100,[2]
21 | 105,[9]
22 | 141,[5]
23 | 168,[2]
24 | 169,[5]
25 | 174,"[1, 3]"
26 | 183,"[1, 5, 6, 8]"
27 | 185,[7]
28 | 188,[4]
29 | 189,[2]
30 | 204,[1]
31 | 207,[2]
32 | 214,[6]
33 | 217,"[1, 3]"
34 | 218,"[0, 1, 5]"
35 | 225,[7]
36 | 231,[7]
37 | 243,"[1, 3]"
38 | 248,[7]
39 | 251,[7]
40 | 252,[0]
41 | 253,"[0, 5]"
42 | 257,[9]
43 | 258,[3]
44 | 285,[5]
45 | 300,"[3, 4, 6]"
46 | 303,"[5, 8, 9]"
47 | 309,[3]
48 | 316,[2]
49 | 320,[2]
50 | 327,[0]
51 | 348,[2]
52 | 356,"[0, 3, 5, 7]"
53 | 361,"[0, 1, 5, 6]"
54 | 370,"[2, 3, 5]"
55 | 374,"[1, 4, 5, 6]"
56 | 376,"[0, 7]"
57 | 381,"[1, 5]"
58 | 387,[8]
59 | 388,[2]
60 | 395,[1]
61 | 398,"[0, 5]"
62 | 401,"[3, 4, 6]"
63 | 403,[2]
64 | 410,"[1, 5, 6]"
65 | 420,[3]
66 | 424,[2]
67 | 433,"[0, 3, 4, 5]"
68 | 


--------------------------------------------------------------------------------
/data_od_evaluation/SPECT_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 40,"[0, 6]"
 3 | 41,"[0, 3, 7]"
 4 | 42,"[0, 2, 8]"
 5 | 43,"[0, 8]"
 6 | 44,"[0, 1, 2, 4, 5]"
 7 | 45,"[0, 1, 6]"
 8 | 46,"[0, 2, 3, 4, 6, 7, 8, 9]"
 9 | 47,"[0, 1, 3, 6, 8]"
10 | 48,[0]
11 | 49,"[0, 9]"
12 | 50,"[0, 3, 6, 7, 8]"
13 | 51,[0]
14 | 52,[0]
15 | 53,"[0, 6, 8]"
16 | 54,"[1, 2, 4, 5, 6]"
17 | 55,"[0, 1, 2, 3, 9]"
18 | 56,"[0, 2, 4, 5, 7, 8]"
19 | 57,"[0, 1, 2, 3, 4, 9]"
20 | 58,[0]
21 | 59,[0]
22 | 60,"[0, 1, 2, 8, 9]"
23 | 61,[0]
24 | 62,"[0, 6]"
25 | 63,[0]
26 | 64,[0]
27 | 65,[0]
28 | 66,"[0, 6]"
29 | 67,[0]
30 | 68,"[0, 1, 3]"
31 | 69,[0]
32 | 70,"[0, 6, 8]"
33 | 71,"[0, 1, 5]"
34 | 72,"[1, 2, 4, 5, 9]"
35 | 73,"[1, 3]"
36 | 74,[0]
37 | 75,"[0, 1, 3, 4]"
38 | 76,"[1, 2]"
39 | 77,"[1, 3]"
40 | 78,"[1, 4, 6]"
41 | 79,"[0, 1, 3]"
42 | 252,[0]
43 | 253,"[1, 2, 4, 6, 8]"
44 | 254,[0]
45 | 255,"[0, 2, 3, 6, 9]"
46 | 256,"[0, 2, 8]"
47 | 257,[0]
48 | 258,[0]
49 | 259,"[0, 1, 8]"
50 | 260,"[2, 3, 6, 8, 9]"
51 | 261,[0]
52 | 262,[0]
53 | 263,"[1, 2, 7, 8]"
54 | 264,"[3, 4, 9]"
55 | 265,"[2, 8, 9]"
56 | 266,[0]
57 | 


--------------------------------------------------------------------------------
/data_od_evaluation/SPECT_pca_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 40,"[0, 6]"
 3 | 41,"[3, 6, 7, 8]"
 4 | 42,"[2, 4, 7, 8]"
 5 | 43,"[3, 4, 8, 9]"
 6 | 44,"[2, 4, 5]"
 7 | 45,"[1, 3, 6]"
 8 | 46,"[6, 9]"
 9 | 47,"[6, 8]"
10 | 48,"[0, 6]"
11 | 49,"[1, 6, 9]"
12 | 50,"[3, 8, 9]"
13 | 51,"[0, 6]"
14 | 52,"[0, 6]"
15 | 53,"[1, 3, 5, 6]"
16 | 54,"[1, 3, 4, 5, 6]"
17 | 55,"[1, 2, 3, 5, 6, 8, 9]"
18 | 56,"[1, 3, 4, 6, 7, 8, 9]"
19 | 57,"[0, 1, 6, 8, 9]"
20 | 58,"[0, 6]"
21 | 59,"[0, 6]"
22 | 60,"[1, 3, 5, 8, 9]"
23 | 61,"[0, 6]"
24 | 62,"[0, 6]"
25 | 63,"[0, 6]"
26 | 64,"[0, 6]"
27 | 65,"[0, 6]"
28 | 66,"[0, 6]"
29 | 67,"[0, 6]"
30 | 68,"[1, 3, 8, 9]"
31 | 69,"[0, 6]"
32 | 70,"[3, 6]"
33 | 71,"[2, 5, 9]"
34 | 72,"[1, 2, 3, 4, 5]"
35 | 73,"[3, 6]"
36 | 74,"[0, 6]"
37 | 75,"[1, 3]"
38 | 76,"[1, 7]"
39 | 77,"[1, 3]"
40 | 78,"[1, 4, 6]"
41 | 79,"[1, 3, 8, 9]"
42 | 252,"[0, 6]"
43 | 253,"[1, 4, 5, 6, 9]"
44 | 254,"[0, 6]"
45 | 255,"[3, 4, 5, 6, 7, 9]"
46 | 256,"[2, 4, 7, 8]"
47 | 257,"[0, 6]"
48 | 258,"[0, 6]"
49 | 259,"[1, 3, 5, 7, 8]"
50 | 260,"[6, 9]"
51 | 261,"[0, 6]"
52 | 262,"[0, 6]"
53 | 263,"[1, 2, 8]"
54 | 264,"[3, 6, 8]"
55 | 265,"[1, 6, 8, 9]"
56 | 266,"[0, 4]"
57 | 


--------------------------------------------------------------------------------
/data_od_evaluation/satimage-2_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 5732,[2]
 3 | 5733,[1]
 4 | 5734,"[2, 9]"
 5 | 5735,[5]
 6 | 5736,[5]
 7 | 5737,[1]
 8 | 5738,[1]
 9 | 5739,[1]
10 | 5740,[1]
11 | 5741,"[1, 2, 3, 4, 9]"
12 | 5742,[1]
13 | 5743,[1]
14 | 5744,[1]
15 | 5745,[1]
16 | 5746,[5]
17 | 5747,[1]
18 | 5748,"[1, 9]"
19 | 5749,[7]
20 | 5750,[1]
21 | 5751,[1]
22 | 5752,[1]
23 | 5753,[1]
24 | 5754,"[0, 3, 5]"
25 | 5755,[1]
26 | 5756,[1]
27 | 5757,[1]
28 | 5758,[5]
29 | 5759,"[1, 3]"
30 | 5760,[1]
31 | 5761,[1]
32 | 5762,"[1, 5, 9]"
33 | 5763,"[1, 2, 3, 4, 9]"
34 | 5764,[1]
35 | 5765,[1]
36 | 5766,"[5, 6]"
37 | 5767,[1]
38 | 5768,[1]
39 | 5769,[5]
40 | 5770,[1]
41 | 5771,[5]
42 | 5772,[1]
43 | 5773,[1]
44 | 5774,[1]
45 | 5775,"[1, 3]"
46 | 5776,[1]
47 | 5777,[5]
48 | 5778,"[7, 9]"
49 | 5779,[1]
50 | 5780,"[2, 9]"
51 | 5781,"[2, 3, 5, 7, 9]"
52 | 5782,"[3, 5]"
53 | 5783,[1]
54 | 5784,[1]
55 | 5785,[1]
56 | 5786,[5]
57 | 5787,"[2, 4]"
58 | 5788,[1]
59 | 5789,[1]
60 | 5790,[1]
61 | 5791,[7]
62 | 5792,"[1, 3]"
63 | 5793,[1]
64 | 5794,[1]
65 | 5795,[1]
66 | 5796,[1]
67 | 5797,[1]
68 | 5798,[7]
69 | 5799,"[0, 3, 5]"
70 | 5800,[1]
71 | 5801,[1]
72 | 5802,"[2, 9]"
73 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wineQualityReds-od2_gt_hbos.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 18,[3]
 3 | 38,"[0, 1]"
 4 | 41,"[3, 6]"
 5 | 45,"[8, 10]"
 6 | 73,[6]
 7 | 79,[6]
 8 | 94,"[0, 1]"
 9 | 151,[2]
10 | 161,[9]
11 | 167,"[5, 10]"
12 | 170,"[1, 9]"
13 | 199,"[1, 5, 9, 10]"
14 | 224,[6]
15 | 261,[6]
16 | 266,"[1, 3, 5, 7, 8]"
17 | 409,"[0, 3, 5]"
18 | 459,[10]
19 | 517,"[0, 4, 10]"
20 | 573,[0]
21 | 576,[0]
22 | 600,"[1, 5, 9, 10]"
23 | 633,"[1, 7]"
24 | 647,"[0, 1, 10]"
25 | 659,[3]
26 | 690,"[1, 3]"
27 | 703,[6]
28 | 704,"[0, 1]"
29 | 724,[1]
30 | 813,[10]
31 | 830,[3]
32 | 832,"[0, 4, 5]"
33 | 833,"[0, 5]"
34 | 872,[5]
35 | 876,[10]
36 | 899,"[1, 3, 9, 10]"
37 | 927,[6]
38 | 937,[0]
39 | 1124,[0]
40 | 1176,"[0, 1, 3, 5, 10]"
41 | 1189,"[1, 9]"
42 | 1233,"[0, 1, 5, 6, 9]"
43 | 1235,[3]
44 | 1238,"[1, 3, 5, 9]"
45 | 1239,[3]
46 | 1261,"[0, 1, 10]"
47 | 1263,"[1, 5, 9]"
48 | 1276,[3]
49 | 1293,"[1, 5, 9]"
50 | 1299,[1]
51 | 1307,[3]
52 | 1363,[6]
53 | 1369,"[0, 7, 9]"
54 | 1374,[4]
55 | 1423,[3]
56 | 1461,"[0, 1, 8, 10]"
57 | 1467,"[0, 1, 10]"
58 | 1469,"[1, 6]"
59 | 1478,[3]
60 | 1480,[0]
61 | 1482,"[2, 10]"
62 | 1484,"[0, 1, 10]"
63 | 1505,"[0, 1, 8, 10]"
64 | 1521,[10]
65 | 


--------------------------------------------------------------------------------
/data_od_evaluation/arrhythmia_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 0,"[0, 9]"
 3 | 4,[9]
 4 | 5,"[1, 7, 9]"
 5 | 10,[1]
 6 | 26,"[0, 9]"
 7 | 34,"[5, 8]"
 8 | 45,[4]
 9 | 46,[8]
10 | 60,[2]
11 | 61,[1]
12 | 76,[9]
13 | 83,"[1, 2]"
14 | 85,"[1, 5]"
15 | 87,"[3, 8]"
16 | 88,"[1, 3, 6]"
17 | 89,"[2, 6, 8]"
18 | 91,[4]
19 | 93,"[4, 9]"
20 | 100,"[1, 7, 8, 9]"
21 | 105,"[8, 9]"
22 | 141,[5]
23 | 168,"[2, 6]"
24 | 169,"[5, 8]"
25 | 174,"[1, 3]"
26 | 183,"[2, 5, 8]"
27 | 185,"[1, 6, 7]"
28 | 188,"[2, 3]"
29 | 189,"[0, 2, 3, 4, 5]"
30 | 204,"[1, 5, 9]"
31 | 207,[2]
32 | 214,"[2, 6]"
33 | 217,"[3, 7]"
34 | 218,"[0, 1, 5, 9]"
35 | 225,"[1, 7]"
36 | 231,"[1, 4, 5, 7]"
37 | 243,"[1, 3]"
38 | 248,"[2, 9]"
39 | 251,"[1, 7, 8, 9]"
40 | 252,[0]
41 | 253,"[0, 5]"
42 | 257,[8]
43 | 258,[3]
44 | 285,"[0, 4, 5]"
45 | 300,"[3, 8]"
46 | 303,"[0, 5, 8, 9]"
47 | 309,[3]
48 | 316,"[2, 3, 5]"
49 | 320,[2]
50 | 327,"[0, 8]"
51 | 348,[2]
52 | 356,"[0, 5]"
53 | 361,"[0, 1, 5, 9]"
54 | 370,"[1, 2, 3, 5]"
55 | 374,"[0, 1, 6]"
56 | 376,"[0, 4]"
57 | 381,"[1, 5]"
58 | 387,"[3, 8]"
59 | 388,[2]
60 | 395,"[1, 2, 3, 4, 6]"
61 | 398,"[0, 5]"
62 | 401,"[6, 8]"
63 | 403,"[0, 2, 9]"
64 | 410,"[1, 6, 7]"
65 | 420,"[2, 3]"
66 | 424,"[2, 4, 6, 8]"
67 | 433,"[0, 4]"
68 | 


--------------------------------------------------------------------------------
/data_od_evaluation/arrhythmia_pca_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 0,"[6, 9]"
 3 | 4,[9]
 4 | 5,"[1, 7]"
 5 | 10,"[1, 7]"
 6 | 26,"[3, 4, 5, 9]"
 7 | 34,"[1, 3, 5, 6]"
 8 | 45,"[3, 4, 9]"
 9 | 46,"[6, 9]"
10 | 60,"[2, 3]"
11 | 61,"[3, 5]"
12 | 76,"[1, 5, 8, 9]"
13 | 83,"[0, 1]"
14 | 85,[1]
15 | 87,"[3, 4, 6]"
16 | 88,"[1, 6]"
17 | 89,"[2, 8]"
18 | 91,"[4, 5, 9]"
19 | 93,"[0, 1, 3, 9]"
20 | 100,"[3, 7, 9]"
21 | 105,"[8, 9]"
22 | 141,[5]
23 | 168,"[2, 3, 9]"
24 | 169,[6]
25 | 174,"[1, 3, 9]"
26 | 183,"[1, 3, 6, 9]"
27 | 185,[6]
28 | 188,"[2, 3, 4]"
29 | 189,"[0, 7]"
30 | 204,"[1, 5, 8]"
31 | 207,"[1, 2, 6, 9]"
32 | 214,[6]
33 | 217,[3]
34 | 218,"[0, 1, 5, 9]"
35 | 225,"[0, 1, 5, 7]"
36 | 231,[6]
37 | 243,"[3, 8]"
38 | 248,"[0, 3, 6, 9]"
39 | 251,"[6, 9]"
40 | 252,"[0, 5, 9]"
41 | 253,"[0, 5]"
42 | 257,"[1, 5, 8, 9]"
43 | 258,"[3, 7, 9]"
44 | 285,"[0, 5]"
45 | 300,"[3, 4, 6]"
46 | 303,"[0, 5, 9]"
47 | 309,"[1, 3, 7]"
48 | 316,"[3, 5, 6]"
49 | 320,"[2, 3, 5]"
50 | 327,"[0, 9]"
51 | 348,"[1, 5, 8]"
52 | 356,"[0, 5]"
53 | 361,"[0, 1, 5, 7]"
54 | 370,"[2, 3]"
55 | 374,"[2, 5]"
56 | 376,"[0, 7]"
57 | 381,"[1, 5, 8]"
58 | 387,"[3, 6, 8]"
59 | 388,[2]
60 | 395,"[0, 1]"
61 | 398,"[0, 5]"
62 | 401,"[3, 4, 6]"
63 | 403,"[0, 9]"
64 | 410,[6]
65 | 420,"[3, 4]"
66 | 424,"[1, 7]"
67 | 433,"[0, 3, 5]"
68 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wineQualityReds-od2_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 18,"[3, 10]"
 3 | 38,"[0, 1, 4, 6]"
 4 | 41,[9]
 5 | 45,"[1, 8, 10]"
 6 | 73,"[1, 10]"
 7 | 79,"[5, 6, 9]"
 8 | 94,"[0, 1, 8]"
 9 | 151,[2]
10 | 161,"[1, 2, 6, 9, 10]"
11 | 167,"[2, 10]"
12 | 170,[9]
13 | 199,"[1, 7, 9]"
14 | 224,"[1, 2, 10]"
15 | 261,"[1, 2, 10]"
16 | 266,"[2, 7]"
17 | 409,"[0, 5]"
18 | 459,"[1, 2, 10]"
19 | 517,"[4, 7, 10]"
20 | 573,"[0, 1]"
21 | 576,"[0, 2, 6]"
22 | 600,"[1, 2]"
23 | 633,"[1, 7, 10]"
24 | 647,"[1, 5, 10]"
25 | 659,"[1, 3]"
26 | 690,"[1, 8]"
27 | 703,"[2, 6]"
28 | 704,"[0, 1, 2, 5]"
29 | 724,[1]
30 | 813,[6]
31 | 830,[6]
32 | 832,"[4, 5]"
33 | 833,"[0, 4, 5]"
34 | 872,"[1, 5, 9, 10]"
35 | 876,"[1, 2]"
36 | 899,"[1, 7]"
37 | 927,"[5, 6, 8]"
38 | 937,"[0, 3]"
39 | 1124,"[5, 8]"
40 | 1176,"[1, 3, 8]"
41 | 1189,"[0, 1, 2]"
42 | 1233,"[1, 10]"
43 | 1235,"[0, 3, 8]"
44 | 1238,"[0, 1, 2, 7, 9]"
45 | 1239,"[2, 3, 8, 10]"
46 | 1261,"[1, 2, 7, 10]"
47 | 1263,"[1, 2, 9]"
48 | 1276,"[3, 6]"
49 | 1293,"[1, 2, 5, 10]"
50 | 1299,[1]
51 | 1307,"[2, 3, 10]"
52 | 1363,"[1, 2]"
53 | 1369,[9]
54 | 1374,"[1, 4, 7]"
55 | 1423,"[3, 4, 8]"
56 | 1461,"[0, 1, 6, 8]"
57 | 1467,"[1, 2]"
58 | 1469,"[1, 5]"
59 | 1478,"[1, 3, 8]"
60 | 1480,"[8, 10]"
61 | 1482,"[0, 2, 5, 10]"
62 | 1484,"[1, 5, 10]"
63 | 1505,"[0, 1, 6, 8]"
64 | 1521,"[9, 10]"
65 | 


--------------------------------------------------------------------------------
/data_od_evaluation/speech_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 0,"[0, 1]"
 3 | 1,"[1, 7]"
 4 | 2,"[2, 3, 4]"
 5 | 3,"[0, 2, 3, 5, 8]"
 6 | 4,"[0, 2, 3]"
 7 | 5,"[0, 1, 6, 8]"
 8 | 6,"[0, 1]"
 9 | 7,"[1, 6]"
10 | 8,"[1, 3, 7]"
11 | 9,"[1, 8, 9]"
12 | 10,"[1, 2, 6]"
13 | 11,"[1, 6, 8]"
14 | 12,"[1, 9]"
15 | 13,"[1, 9]"
16 | 14,"[2, 3, 7]"
17 | 15,"[1, 6]"
18 | 16,"[1, 6]"
19 | 17,"[3, 6]"
20 | 18,"[1, 8, 9]"
21 | 19,"[1, 2, 3, 5, 7, 8]"
22 | 20,"[1, 2]"
23 | 21,[1]
24 | 22,"[1, 8, 9]"
25 | 23,"[1, 8, 9]"
26 | 24,"[6, 9]"
27 | 25,"[7, 9]"
28 | 26,"[1, 4, 5]"
29 | 27,"[0, 1, 6]"
30 | 28,"[0, 1, 2, 3, 6, 7, 9]"
31 | 29,"[0, 1, 4, 6, 7, 9]"
32 | 30,"[0, 1, 3, 4, 5, 9]"
33 | 31,"[0, 2, 3, 4]"
34 | 32,"[1, 4]"
35 | 33,"[1, 9]"
36 | 34,"[0, 1, 4, 6, 8]"
37 | 35,"[0, 2, 3, 4, 5]"
38 | 36,"[0, 2]"
39 | 37,"[0, 2, 3]"
40 | 38,"[0, 1, 2, 3, 5, 6, 8]"
41 | 39,"[4, 7]"
42 | 40,"[1, 8, 9]"
43 | 41,"[2, 4, 5, 7]"
44 | 42,[6]
45 | 43,"[1, 2, 3, 6, 7, 8]"
46 | 44,"[4, 5, 6, 7]"
47 | 45,"[0, 2, 3]"
48 | 46,"[2, 3, 4, 6]"
49 | 47,"[1, 3, 4, 5, 7, 8, 9]"
50 | 48,"[1, 2, 3, 4, 7, 8]"
51 | 49,"[2, 3, 5, 6, 9]"
52 | 50,"[0, 2, 5, 8, 9]"
53 | 51,"[0, 2]"
54 | 52,"[0, 2, 3, 5]"
55 | 53,[2]
56 | 54,"[0, 3, 6, 8]"
57 | 55,"[0, 2, 3, 7]"
58 | 56,"[0, 1, 2, 3, 8]"
59 | 57,"[0, 1, 6, 7, 8]"
60 | 58,"[5, 7, 8]"
61 | 59,"[1, 2, 6, 7, 8]"
62 | 60,"[1, 2, 4, 6, 9]"
63 | 


--------------------------------------------------------------------------------
/data_od_evaluation/speech_pca_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 0,"[1, 4]"
 3 | 1,"[0, 1, 2, 7, 9]"
 4 | 2,"[0, 1, 3, 4]"
 5 | 3,"[0, 1, 3, 6, 8]"
 6 | 4,"[0, 2, 3, 5]"
 7 | 5,"[0, 1, 6]"
 8 | 6,"[0, 1, 4, 7]"
 9 | 7,"[1, 6]"
10 | 8,"[1, 7, 8]"
11 | 9,"[1, 7, 8]"
12 | 10,"[1, 3, 6]"
13 | 11,"[1, 6, 8]"
14 | 12,"[1, 7]"
15 | 13,"[1, 4, 9]"
16 | 14,"[1, 2, 3, 7]"
17 | 15,"[0, 1, 4, 5]"
18 | 16,"[0, 1, 7]"
19 | 17,"[1, 3, 5]"
20 | 18,"[1, 4]"
21 | 19,"[1, 2, 3, 5, 7, 8, 9]"
22 | 20,"[1, 2, 3, 5, 8, 9]"
23 | 21,"[1, 7, 8]"
24 | 22,"[1, 4]"
25 | 23,"[1, 4, 6, 9]"
26 | 24,"[1, 6, 7]"
27 | 25,"[0, 1, 7]"
28 | 26,"[0, 1, 4, 6]"
29 | 27,"[0, 1, 6, 7]"
30 | 28,"[0, 1, 2, 7, 9]"
31 | 29,"[1, 7, 8]"
32 | 30,"[0, 1, 2, 3, 4, 7, 9]"
33 | 31,"[0, 1, 2, 4, 7, 9]"
34 | 32,"[1, 7, 8]"
35 | 33,"[1, 6]"
36 | 34,"[4, 5, 6, 8]"
37 | 35,"[0, 2, 3, 5]"
38 | 36,"[0, 2, 3, 5]"
39 | 37,"[0, 2, 3, 5]"
40 | 38,"[2, 3, 5, 6]"
41 | 39,"[1, 7]"
42 | 40,"[1, 7, 8]"
43 | 41,"[2, 5, 6, 7]"
44 | 42,"[1, 6]"
45 | 43,"[1, 2, 5, 6, 8]"
46 | 44,"[2, 5, 6, 7]"
47 | 45,"[0, 2, 3, 7]"
48 | 46,"[3, 4, 6, 8]"
49 | 47,"[1, 3, 8]"
50 | 48,"[0, 1, 2, 7, 8]"
51 | 49,"[2, 6, 8, 9]"
52 | 50,"[0, 2, 4, 9]"
53 | 51,"[0, 2, 9]"
54 | 52,"[0, 2, 3, 5]"
55 | 53,"[0, 1, 2, 4]"
56 | 54,"[0, 3, 6]"
57 | 55,"[2, 3, 7]"
58 | 56,"[0, 1, 2, 3]"
59 | 57,"[0, 4, 7]"
60 | 58,"[1, 2, 3, 6, 8]"
61 | 59,"[0, 1, 2, 6, 7, 8, 9]"
62 | 60,"[1, 4, 6]"
63 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wineQualityReds-od2_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 18,"[1, 3, 4, 10]"
 3 | 38,"[0, 1, 4]"
 4 | 41,"[1, 3, 9]"
 5 | 45,"[0, 8]"
 6 | 73,"[1, 7, 10]"
 7 | 79,"[6, 9, 10]"
 8 | 94,"[0, 1, 5]"
 9 | 151,[2]
10 | 161,"[1, 3, 9]"
11 | 167,"[5, 9, 10]"
12 | 170,"[1, 6, 9, 10]"
13 | 199,"[1, 8, 9, 10]"
14 | 224,"[0, 1, 4, 6]"
15 | 261,"[0, 1]"
16 | 266,"[1, 7, 8]"
17 | 409,"[0, 3, 5, 8]"
18 | 459,"[7, 10]"
19 | 517,"[1, 4, 7, 10]"
20 | 573,"[0, 6, 7]"
21 | 576,"[0, 4, 6]"
22 | 600,"[0, 1, 9]"
23 | 633,"[1, 7, 10]"
24 | 647,"[1, 5, 10]"
25 | 659,"[1, 3, 4]"
26 | 690,"[1, 3, 8]"
27 | 703,"[1, 2, 5, 6]"
28 | 704,"[0, 1, 5]"
29 | 724,"[1, 3]"
30 | 813,"[4, 6, 8, 10]"
31 | 830,"[1, 4, 6, 10]"
32 | 832,"[0, 4, 5]"
33 | 833,"[0, 4, 5]"
34 | 872,"[5, 8]"
35 | 876,"[4, 6, 8, 10]"
36 | 899,"[1, 7, 10]"
37 | 927,"[1, 4, 6, 10]"
38 | 937,"[0, 1]"
39 | 1124,"[4, 5, 8, 10]"
40 | 1176,"[1, 3, 8]"
41 | 1189,"[0, 1, 9]"
42 | 1233,"[0, 1, 9]"
43 | 1235,"[0, 1, 3, 4, 6]"
44 | 1238,"[1, 7, 9]"
45 | 1239,"[1, 3, 8, 10]"
46 | 1261,"[1, 8]"
47 | 1263,"[1, 7, 9]"
48 | 1276,"[3, 4, 10]"
49 | 1293,"[1, 2]"
50 | 1299,[1]
51 | 1307,"[1, 3]"
52 | 1363,"[1, 6, 9]"
53 | 1369,"[0, 6, 9]"
54 | 1374,"[1, 3, 4]"
55 | 1423,"[3, 4, 8]"
56 | 1461,"[1, 2, 8]"
57 | 1467,"[1, 5, 10]"
58 | 1469,"[1, 5]"
59 | 1478,"[1, 3]"
60 | 1480,[8]
61 | 1482,"[4, 6]"
62 | 1484,"[1, 8]"
63 | 1505,"[1, 8]"
64 | 1521,"[9, 10]"
65 | 


--------------------------------------------------------------------------------
/eval/eva_main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pandas as pd
 4 | from eval import evaluation_od
 5 | from config import root
 6 | 
 7 | 
 8 | def main(path):
 9 |     data_name = path.split("/")[-1].split(".")[0]
10 |     df = pd.read_csv(path)
11 |     X = df.values[:, :-1]
12 |     y = np.array(df.values[:, -1], dtype=int)
13 |     print("Data name: [%s]" % data_name)
14 | 
15 |     model_name1 = "hbos"
16 |     path1 = "data_od_evaluation/" + data_name + "_gt_" + model_name1 + ".csv"
17 |     path2 = "data_od_evaluation/" + data_name + "_score_" + model_name1 + ".csv"
18 |     if not (os.path.exists(path1) and os.path.exists(path2)):
19 |         print("OD evaluation model training is processing...")
20 |         evaluation_od.evaluation_od_train(X, y, data_name, model_name1)
21 | 
22 |     return
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     input_root_list = [root + "data/"]
27 |     runs = 1
28 | 
29 |     for input_root in input_root_list:
30 |         if os.path.isdir(input_root):
31 |             for file_name in sorted(os.listdir(input_root)):
32 |                 if file_name.endswith(".csv"):
33 |                     input_path = str(os.path.join(input_root, file_name))
34 |                     name = input_path.split("/")[-1].split('.')[0]
35 |                     main(input_path)
36 | 
37 |         else:
38 |             input_path = input_root
39 |             name = input_path.split("/")[-1].split(".")[0]
40 |             main(input_path)


--------------------------------------------------------------------------------
/data_od_evaluation/satimage-2_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 5732,"[1, 2, 4, 8]"
 3 | 5733,[1]
 4 | 5734,"[1, 5, 7]"
 5 | 5735,[5]
 6 | 5736,[5]
 7 | 5737,"[0, 1, 2, 4]"
 8 | 5738,[1]
 9 | 5739,"[0, 1, 6]"
10 | 5740,"[0, 1, 2, 8, 9]"
11 | 5741,"[0, 1, 7]"
12 | 5742,"[2, 3]"
13 | 5743,[1]
14 | 5744,[1]
15 | 5745,"[0, 1]"
16 | 5746,"[2, 4, 5, 6, 7, 8]"
17 | 5747,"[0, 2]"
18 | 5748,"[2, 4, 9]"
19 | 5749,"[0, 3, 4, 7]"
20 | 5750,"[1, 2, 4, 8]"
21 | 5751,"[0, 1, 6]"
22 | 5752,[2]
23 | 5753,"[1, 9]"
24 | 5754,"[0, 1, 3]"
25 | 5755,"[1, 2, 8]"
26 | 5756,"[0, 1, 5]"
27 | 5757,"[0, 1, 2, 8]"
28 | 5758,"[5, 7]"
29 | 5759,"[0, 1, 3, 5]"
30 | 5760,"[0, 2, 3, 4, 5]"
31 | 5761,[1]
32 | 5762,"[1, 7]"
33 | 5763,"[0, 1, 7]"
34 | 5764,"[1, 7]"
35 | 5765,"[0, 1, 2, 3]"
36 | 5766,"[4, 5, 6]"
37 | 5767,"[0, 1, 2, 3]"
38 | 5768,"[1, 2, 4, 7]"
39 | 5769,"[5, 7]"
40 | 5770,"[0, 2]"
41 | 5771,"[1, 5, 6]"
42 | 5772,"[0, 1, 6]"
43 | 5773,"[1, 3]"
44 | 5774,"[1, 2, 6]"
45 | 5775,"[0, 1, 3, 4, 5]"
46 | 5776,"[0, 2]"
47 | 5777,"[5, 9]"
48 | 5778,"[5, 7, 9]"
49 | 5779,"[0, 2]"
50 | 5780,"[0, 1, 9]"
51 | 5781,"[2, 4, 5]"
52 | 5782,"[0, 1, 3, 4]"
53 | 5783,[1]
54 | 5784,[1]
55 | 5785,"[0, 2]"
56 | 5786,"[0, 1, 5]"
57 | 5787,"[4, 5]"
58 | 5788,[7]
59 | 5789,"[0, 2]"
60 | 5790,"[1, 2, 6]"
61 | 5791,"[1, 3, 4, 5, 8]"
62 | 5792,"[1, 3]"
63 | 5793,"[0, 1, 6]"
64 | 5794,"[0, 2]"
65 | 5795,"[0, 1, 6]"
66 | 5796,"[0, 1, 2, 8]"
67 | 5797,"[1, 2, 8, 9]"
68 | 5798,"[3, 5, 6, 7, 8]"
69 | 5799,"[1, 3, 5]"
70 | 5800,[1]
71 | 5801,"[0, 2]"
72 | 5802,"[1, 5]"
73 | 


--------------------------------------------------------------------------------
/data_od_evaluation/satimage-2_pca_gt_copod.csv:
--------------------------------------------------------------------------------
 1 | ano_idx,exp_subspace
 2 | 5732,"[2, 3]"
 3 | 5733,"[0, 1, 3, 9]"
 4 | 5734,"[2, 3, 5, 8, 9]"
 5 | 5735,[5]
 6 | 5736,[5]
 7 | 5737,"[1, 2, 5, 6, 9]"
 8 | 5738,"[1, 3]"
 9 | 5739,"[0, 1, 3]"
10 | 5740,"[0, 1, 3, 8, 9]"
11 | 5741,"[1, 2, 3, 7, 8, 9]"
12 | 5742,"[1, 3, 6]"
13 | 5743,"[2, 3]"
14 | 5744,"[1, 3]"
15 | 5745,"[1, 2, 5, 6, 9]"
16 | 5746,"[1, 5, 6]"
17 | 5747,"[0, 2, 5]"
18 | 5748,"[0, 1, 2, 4, 7, 9]"
19 | 5749,"[3, 4, 5, 7, 8, 9]"
20 | 5750,"[1, 3, 4, 5]"
21 | 5751,"[0, 1, 3, 9]"
22 | 5752,[2]
23 | 5753,"[0, 1, 3, 8, 9]"
24 | 5754,"[1, 3, 6]"
25 | 5755,"[1, 3, 9]"
26 | 5756,"[0, 2, 5]"
27 | 5757,"[2, 3, 8]"
28 | 5758,"[1, 4, 5]"
29 | 5759,"[0, 1, 3]"
30 | 5760,"[0, 1, 3, 9]"
31 | 5761,"[1, 3, 9]"
32 | 5762,"[1, 6, 7]"
33 | 5763,"[1, 2, 3, 7, 8, 9]"
34 | 5764,"[1, 3, 9]"
35 | 5765,"[1, 3, 9]"
36 | 5766,"[2, 4, 5, 6, 8]"
37 | 5767,"[0, 1, 2, 3, 6, 8, 9]"
38 | 5768,"[0, 1, 2, 6, 7]"
39 | 5769,"[0, 1, 3, 5]"
40 | 5770,"[0, 2, 3, 5, 6]"
41 | 5771,"[2, 5, 6]"
42 | 5772,"[0, 1, 2, 7]"
43 | 5773,"[0, 1, 3]"
44 | 5774,"[0, 2, 3, 5]"
45 | 5775,"[1, 3, 9]"
46 | 5776,"[1, 2, 3, 4, 6, 7, 8]"
47 | 5777,"[7, 9]"
48 | 5778,"[0, 3, 5, 6, 7, 9]"
49 | 5779,"[0, 1, 3]"
50 | 5780,"[1, 3, 9]"
51 | 5781,"[1, 3, 4, 5, 9]"
52 | 5782,"[3, 7]"
53 | 5783,"[0, 1, 3, 9]"
54 | 5784,[1]
55 | 5785,"[2, 3, 5, 6]"
56 | 5786,"[1, 5, 9]"
57 | 5787,"[2, 4, 7]"
58 | 5788,[7]
59 | 5789,"[0, 1, 3, 5]"
60 | 5790,"[1, 3, 9]"
61 | 5791,"[1, 8]"
62 | 5792,"[0, 1, 3]"
63 | 5793,"[0, 1, 2, 3, 7]"
64 | 5794,"[1, 4, 5]"
65 | 5795,"[0, 1, 2, 5, 9]"
66 | 5796,"[2, 3, 8, 9]"
67 | 5797,"[0, 1, 2, 3, 5, 6, 9]"
68 | 5798,"[3, 4, 5, 6, 7, 8]"
69 | 5799,"[1, 3, 7]"
70 | 5800,"[1, 3, 8]"
71 | 5801,"[0, 2, 3, 5]"
72 | 5802,"[1, 2, 3, 7, 9]"
73 | 


--------------------------------------------------------------------------------
/model_coin/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pandas as pd
 4 | from sklearn.neighbors import LocalOutlierFactor
 5 | from sklearn.ensemble import IsolationForest
 6 | 
 7 | 
 8 | def detect_lof(args, X):
 9 |     num_inst = X.shape[0]
10 |     num_nbr = int(num_inst * args.ratio_nbr)
11 |     clf = LocalOutlierFactor(n_neighbors=num_nbr)
12 |     y_pred = clf.fit_predict(X)
13 |     outlier_scores = -clf.negative_outlier_factor_
14 | 
15 |     return y_pred
16 | 
17 | 
18 | def detect_isoforest(args, X):
19 |     num_inst = X.shape[0]
20 |     clf = IsolationForest(behaviour='new', max_samples=num_inst, random_state=0)
21 |     clf.fit(X)
22 |     y_pred = clf.predict(X)
23 |     outlier_scores = -clf.decision_function(X)
24 | 
25 |     return y_pred
26 | 
27 | 
28 | def get_datast_basic_info(path):
29 |     data_name = path.split("/")[-1].split(".")[0]
30 |     df = pd.read_csv(path)
31 |     x = df.values[:, :-1]
32 |     y = np.array(df.values[:, -1], dtype=int)
33 |     n = x.shape[0]
34 |     dim = x.shape[1]
35 |     n_ano = len(np.where(y == 1)[0])
36 |     ratio_ano = n_ano / n
37 | 
38 |     print("%s, %d, %d, %d, %.4f " % (data_name, n, dim, n_ano, ratio_ano))
39 | 
40 |     return
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     input_root_list = ["E:/OneDrive/work/0data/odds/integer/"]
45 | 
46 |     seed = -1
47 | 
48 |     for input_root in input_root_list:
49 |         if os.path.isdir(input_root):
50 |             for file_name in sorted(os.listdir(input_root)):
51 |                 if file_name.endswith(".csv"):
52 |                     input_path = str(os.path.join(input_root, file_name))
53 |                     name = input_path.split("/")[-1].split('.')[0]
54 |                     get_datast_basic_info(input_path)
55 | 
56 |         else:
57 |             input_path = input_root
58 |             name = input_path.split("/")[-1].split(".")[0]
59 |             get_datast_basic_info(input_path)


--------------------------------------------------------------------------------
/model_iml/LIME.py:
--------------------------------------------------------------------------------
 1 | import lime
 2 | import lime.lime_tabular
 3 | import numpy as np
 4 | import sklearn
 5 | import math
 6 | from tqdm import tqdm
 7 | import sklearn.datasets
 8 | 
 9 | 
10 | class LIME:
11 |     def __init__(self, discretize_continuous=True, discretizer='quartile'):
12 |         """
13 | 
14 |         :param discretize_continuous: if True, all non-categorical features will be discretized into quartiles.
15 |         :param discretizer: only matters if discretize_continuous is True and data is not sparse.
16 |         Options are 'quartile', 'decile', 'entropy' or a BaseDiscretizer instance.
17 |         """
18 |         self.discretize_continuous = discretize_continuous
19 |         self.discretizer = discretizer
20 | 
21 |         self.dim = None
22 |         self.ano_idx = None
23 |         return
24 | 
25 |     def fit(self, x, y, ano_class=1):
26 |         self.ano_idx = np.where(y == 1)[0]
27 |         ano_idx = self.ano_idx
28 |         self.dim = x.shape[1]
29 |         svm = sklearn.svm.SVC(kernel="rbf", probability=True)
30 |         svm.fit(x, y)
31 | 
32 |         y_pred = svm.predict(x)
33 |         print("Clf model accuracy: [{:.4f}]".format(sklearn.metrics.accuracy_score(y, y_pred)))
34 | 
35 |         explainer = lime.lime_tabular.LimeTabularExplainer(x, discretize_continuous=self.discretize_continuous,
36 |                                                            discretizer=self.discretizer)
37 |         ano_f_weights = np.zeros([len(ano_idx), self.dim])
38 | 
39 |         print(len(ano_idx))
40 | 
41 |         for ii in tqdm(range(len(ano_idx))):
42 |             idx = ano_idx[ii]
43 |             exp = explainer.explain_instance(x[idx], svm.predict_proba, labels=(ano_class,), num_features=self.dim)
44 |             tuples = exp.as_map()[1]
45 |             for tuple in tuples:
46 |                 f_id, weight = tuple
47 |                 ano_f_weights[ii][f_id] = weight
48 |         return ano_f_weights
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/data_od_evaluation/letter_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 1500,[2]
  3 | 1501,"[4, 8]"
  4 | 1502,"[0, 5]"
  5 | 1503,"[0, 4]"
  6 | 1504,"[0, 1]"
  7 | 1505,"[0, 1, 4, 5, 9]"
  8 | 1506,"[1, 4, 6, 7, 8, 9]"
  9 | 1507,[9]
 10 | 1508,"[1, 2, 9]"
 11 | 1509,"[1, 8, 9]"
 12 | 1510,"[3, 4, 6, 7, 8]"
 13 | 1511,[4]
 14 | 1512,[0]
 15 | 1513,"[0, 2, 3, 5, 6]"
 16 | 1514,"[0, 4, 7]"
 17 | 1515,"[0, 3, 5, 6]"
 18 | 1516,[4]
 19 | 1517,"[0, 4]"
 20 | 1518,[4]
 21 | 1519,[8]
 22 | 1520,"[6, 7, 8]"
 23 | 1521,"[0, 1, 2, 4, 6, 9]"
 24 | 1522,"[0, 9]"
 25 | 1523,[2]
 26 | 1524,"[0, 1, 2, 5, 6, 8]"
 27 | 1525,"[3, 5, 6, 7]"
 28 | 1526,[0]
 29 | 1527,"[1, 2, 7]"
 30 | 1528,"[1, 2, 4, 9]"
 31 | 1529,"[2, 4, 6, 7, 8]"
 32 | 1530,"[2, 4, 8, 9]"
 33 | 1531,[5]
 34 | 1532,"[0, 1, 6, 9]"
 35 | 1533,[9]
 36 | 1534,"[3, 6, 7]"
 37 | 1535,"[2, 9]"
 38 | 1536,[7]
 39 | 1537,[6]
 40 | 1538,[0]
 41 | 1539,[0]
 42 | 1540,"[0, 1, 7]"
 43 | 1541,"[0, 1, 4, 7, 9]"
 44 | 1542,"[1, 2, 5, 8]"
 45 | 1543,"[2, 4, 5]"
 46 | 1544,[6]
 47 | 1545,[6]
 48 | 1546,[5]
 49 | 1547,[0]
 50 | 1548,[2]
 51 | 1549,"[2, 9]"
 52 | 1550,[5]
 53 | 1551,[4]
 54 | 1552,[9]
 55 | 1553,"[0, 1]"
 56 | 1554,[0]
 57 | 1555,"[0, 3, 5]"
 58 | 1556,"[3, 6, 9]"
 59 | 1557,"[1, 2, 3, 5, 7, 8]"
 60 | 1558,"[2, 3, 4, 5, 6]"
 61 | 1559,[0]
 62 | 1560,"[0, 1, 5]"
 63 | 1561,[4]
 64 | 1562,"[2, 4, 5, 6]"
 65 | 1563,"[4, 9]"
 66 | 1564,[0]
 67 | 1565,[9]
 68 | 1566,[9]
 69 | 1567,[7]
 70 | 1568,[2]
 71 | 1569,"[0, 1]"
 72 | 1570,[5]
 73 | 1571,[7]
 74 | 1572,"[0, 9]"
 75 | 1573,[0]
 76 | 1574,[0]
 77 | 1575,"[1, 4]"
 78 | 1576,[4]
 79 | 1577,"[1, 6, 7, 9]"
 80 | 1578,"[1, 4, 8, 9]"
 81 | 1579,[4]
 82 | 1580,[0]
 83 | 1581,[9]
 84 | 1582,[9]
 85 | 1583,"[2, 3, 4, 9]"
 86 | 1584,"[1, 2, 7]"
 87 | 1585,[5]
 88 | 1586,[0]
 89 | 1587,[7]
 90 | 1588,"[4, 6, 8, 9]"
 91 | 1589,"[1, 2, 4, 5, 7, 9]"
 92 | 1590,[4]
 93 | 1591,[8]
 94 | 1592,[9]
 95 | 1593,[5]
 96 | 1594,[7]
 97 | 1595,"[1, 2, 5]"
 98 | 1596,[9]
 99 | 1597,"[0, 4, 5, 7, 9]"
100 | 1598,"[2, 9]"
101 | 1599,"[0, 1, 2, 3, 6]"
102 | 


--------------------------------------------------------------------------------
/utils/model_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | def weight2subspace(weight, ratio=0.7, num=-1):
 6 |     """
 7 |     this function is to transfer feature weight list to a feature subspace with higher weight
 8 |     given different ratio (ratio of weight summation of subspace to the full space) of subspace length
 9 |     :param weight:
10 |     :param ratio:
11 |     :param num:
12 |     :return:
13 |     """
14 |     dim = len(weight)
15 | 
16 |     threshold = ratio * np.sum(weight)
17 | 
18 |     sorted_idx = np.argsort(weight)
19 |     sorted_idx = [sorted_idx[dim - i - 1] for i in range(dim)]
20 | 
21 |     if num != -1:
22 |         exp_subspace = sorted_idx[:num]
23 |         exp_subspace = list(np.sort(exp_subspace))
24 |         return exp_subspace
25 | 
26 |     tmp_s = 0
27 |     exp_subspace = []
28 |     for idx in sorted_idx:
29 |         tmp_s += weight[idx]
30 |         exp_subspace.append(idx)
31 |         if tmp_s >= threshold:
32 |             break
33 |     exp_subspace = list(np.sort(exp_subspace))
34 |     return exp_subspace
35 | 
36 | 
37 | def weight2subspace_pn(weight):
38 |     exp_subspace = []
39 |     for i in range(len(weight)):
40 |         if weight[i] > 0:
41 |             exp_subspace.append(i)
42 |     if len(exp_subspace) == 0:
43 |         exp_subspace.append(np.argsort(weight)[len(weight) - 1])
44 |     exp_subspace = list(np.sort(exp_subspace))
45 |     return exp_subspace
46 | 
47 | 
48 | def get_exp_subspace(fea_weight_lst, w2s_ratio, real_exp_len=None):
49 |     exp_subspace_lst = []
50 |     n_ano = len(fea_weight_lst)
51 |     dim = len(fea_weight_lst[0])
52 | 
53 |     for ii in range(n_ano):
54 |         fea_weight = fea_weight_lst[ii]
55 |         if w2s_ratio == "real_len":
56 |             if real_exp_len is None:
57 |                 raise ValueError("not give real exp len")
58 |             exp_subspace_lst.append(weight2subspace(fea_weight, num=real_exp_len[ii]))
59 | 
60 |         elif w2s_ratio == "auto":
61 |             r = math.sqrt(2 / dim)
62 |             exp_subspace_lst.append(weight2subspace(fea_weight, ratio=r))
63 | 
64 |         elif w2s_ratio == "pn":
65 |             exp_subspace_lst.append(weight2subspace_pn(fea_weight))
66 | 
67 |         else:
68 |             exp_subspace_lst.append(weight2subspace(fea_weight, ratio=w2s_ratio))
69 |     return exp_subspace_lst
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/model_iml/Anchor.py:
--------------------------------------------------------------------------------
 1 | import sklearn
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | import numpy as np
 4 | from alibi.explainers import AnchorTabular
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | class Anchor:
 9 |     def __init__(self, kernel="rbf"):
10 |         """
11 | 
12 |         :param kernel: clf model svm parameter
13 |         :param threshold: threshold is used to filter feature subset for each data, the shap values of selected feature
14 |         subspace accounts for [threshold] of the sum of the shap values of feature full space.
15 |         """
16 |         self.ano_idx = None
17 | 
18 |         self.kernel = kernel
19 | 
20 |         self.dim = None
21 |         return
22 | 
23 |     def fit(self, x, y):
24 | 
25 |         self.dim = x.shape[1]
26 | 
27 |         # clf = sklearn.svm.SVC(kernel=self.kernel, probability=True)
28 |         clf = RandomForestClassifier()
29 |         clf.fit(x, y)
30 | 
31 |         y_pred = clf.predict(x)
32 |         print("Clf model accuracy: [{:.4f}]".format(sklearn.metrics.accuracy_score(y, y_pred)))
33 | 
34 |         self.ano_idx = np.where(y == 1)[0]
35 |         print(self.ano_idx.shape)
36 | 
37 |         n_f = x.shape[1]
38 |         feature_names = ["A"+str(i) for i in range(n_f)]
39 |         # use anchor
40 |         predict_fn = lambda xx: clf.predict_proba(xx)
41 |         explainer = AnchorTabular(predict_fn, feature_names)
42 |         explainer.fit(x, disc_perc=(25, 50, 75))
43 | 
44 |         exp_sub_lst = []
45 |         for i in tqdm(range(len(self.ano_idx))):
46 |             ano = x[self.ano_idx[i]]
47 |             explanation = explainer.explain(ano, threshold=0.95)
48 |             anchor = explanation['anchor']
49 |             f_sub = []
50 |             for a in anchor:
51 |                 for item in a.split(" "):
52 |                     if item.startswith("A"):
53 |                         item = int(item[1:])
54 |                         f_sub.append(item)
55 |             # print(anchor, f_sub)
56 |             if len(f_sub) == 0:
57 |                 f_sub = np.arange(n_f)
58 |             exp_sub_lst.append(f_sub)
59 | 
60 |         return exp_sub_lst
61 | 
62 | 
63 | import pandas as pd
64 | path = "../data/00-pima.csv"
65 | df = pd.read_csv(path)
66 | X = df.values[:, :-1]
67 | y = np.array(df.values[:, -1], dtype=int)
68 | model = Anchor()
69 | exp_sub_lst = model.fit(X, y)
70 | print(len(exp_sub_lst))


--------------------------------------------------------------------------------
/data_od_evaluation/ionosphere_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 1,[9]
  3 | 3,"[1, 2, 3, 5, 6, 7, 8, 9]"
  4 | 5,[2]
  5 | 7,"[0, 8]"
  6 | 9,"[0, 3]"
  7 | 11,"[3, 4, 8]"
  8 | 13,"[6, 7, 9]"
  9 | 15,[9]
 10 | 17,[9]
 11 | 19,"[0, 1, 4, 5]"
 12 | 21,"[0, 3, 4]"
 13 | 23,"[1, 3, 4, 5, 7, 9]"
 14 | 25,[1]
 15 | 27,[7]
 16 | 29,"[1, 4]"
 17 | 31,[4]
 18 | 33,"[2, 3, 5, 7, 8, 9]"
 19 | 35,[4]
 20 | 37,"[1, 7, 9]"
 21 | 39,"[1, 6]"
 22 | 41,"[1, 5, 9]"
 23 | 43,"[7, 8]"
 24 | 45,"[0, 4, 6]"
 25 | 47,"[0, 5, 6]"
 26 | 49,"[2, 3, 5, 7, 8, 9]"
 27 | 51,[9]
 28 | 53,[4]
 29 | 55,[0]
 30 | 57,"[4, 7]"
 31 | 59,[4]
 32 | 61,"[0, 2, 3, 4, 6]"
 33 | 63,[6]
 34 | 65,"[6, 8]"
 35 | 67,"[6, 7]"
 36 | 69,"[1, 8]"
 37 | 71,"[1, 7]"
 38 | 73,"[1, 6]"
 39 | 75,[6]
 40 | 77,"[1, 4, 5]"
 41 | 79,"[1, 2, 4, 8]"
 42 | 81,"[2, 3, 5, 6, 7, 8]"
 43 | 83,[1]
 44 | 85,"[0, 6, 7, 8, 9]"
 45 | 87,"[2, 4, 6, 9]"
 46 | 89,"[0, 1, 6]"
 47 | 91,"[2, 5, 8]"
 48 | 93,"[0, 5, 6]"
 49 | 95,[1]
 50 | 98,"[1, 2, 4]"
 51 | 100,"[0, 1, 2, 3, 5]"
 52 | 102,"[0, 2, 7]"
 53 | 104,"[0, 1, 2, 6, 8]"
 54 | 106,[7]
 55 | 108,"[1, 4]"
 56 | 110,"[0, 1, 4, 7]"
 57 | 112,"[0, 7]"
 58 | 114,"[0, 6]"
 59 | 116,[3]
 60 | 118,"[2, 5, 6, 8]"
 61 | 120,"[0, 2, 8]"
 62 | 122,[6]
 63 | 124,[7]
 64 | 126,[8]
 65 | 128,[8]
 66 | 130,[5]
 67 | 132,"[4, 9]"
 68 | 134,"[0, 4]"
 69 | 136,"[0, 5, 6]"
 70 | 138,[2]
 71 | 140,"[2, 5, 6, 8]"
 72 | 142,"[1, 4, 5]"
 73 | 144,"[0, 1]"
 74 | 146,[2]
 75 | 148,"[1, 2, 9]"
 76 | 150,"[0, 1, 6, 7]"
 77 | 152,"[0, 3]"
 78 | 154,"[0, 3]"
 79 | 156,"[0, 3]"
 80 | 158,"[0, 3]"
 81 | 160,"[0, 3]"
 82 | 162,[3]
 83 | 164,"[1, 9]"
 84 | 166,"[2, 7]"
 85 | 168,"[2, 3]"
 86 | 170,"[1, 2, 7, 8]"
 87 | 172,"[4, 9]"
 88 | 174,"[0, 6, 8]"
 89 | 176,"[2, 3, 4, 5]"
 90 | 178,"[0, 3]"
 91 | 180,"[0, 5, 6]"
 92 | 182,"[2, 4]"
 93 | 184,"[0, 2, 4, 8]"
 94 | 186,"[0, 8]"
 95 | 188,[3]
 96 | 190,"[1, 2, 5, 6, 8]"
 97 | 192,"[1, 2, 6, 8]"
 98 | 194,"[2, 3, 5]"
 99 | 196,"[1, 4]"
100 | 198,"[2, 3, 5, 7, 9]"
101 | 200,"[0, 2, 3, 4, 5, 6, 7, 8, 9]"
102 | 202,[5]
103 | 204,[1]
104 | 206,"[0, 5, 6]"
105 | 208,"[0, 4, 6]"
106 | 210,[6]
107 | 212,"[3, 7, 8, 9]"
108 | 214,"[2, 3, 4]"
109 | 216,"[4, 7]"
110 | 218,"[0, 1, 4, 5]"
111 | 220,"[3, 4]"
112 | 222,[7]
113 | 224,"[0, 6, 8]"
114 | 226,[6]
115 | 228,"[2, 8]"
116 | 230,"[0, 6, 8]"
117 | 232,"[2, 4]"
118 | 234,[4]
119 | 236,[1]
120 | 238,"[0, 3]"
121 | 240,"[2, 3, 8]"
122 | 242,[7]
123 | 244,[5]
124 | 246,"[1, 2, 3, 8]"
125 | 248,"[0, 2, 7]"
126 | 250,[5]
127 | 252,"[0, 5, 6]"
128 | 


--------------------------------------------------------------------------------
/data_od_evaluation/letter_pca_gt_copod.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 1500,"[1, 2, 6, 9]"
  3 | 1501,"[1, 4, 6, 8, 9]"
  4 | 1502,"[2, 4, 5]"
  5 | 1503,"[1, 3, 4, 6]"
  6 | 1504,"[1, 4, 6, 9]"
  7 | 1505,"[1, 4, 9]"
  8 | 1506,"[1, 4, 7, 8]"
  9 | 1507,"[4, 8, 9]"
 10 | 1508,"[1, 2, 8, 9]"
 11 | 1509,"[1, 6, 8, 9]"
 12 | 1510,"[1, 3, 6, 7, 8]"
 13 | 1511,"[2, 3, 4, 5, 7]"
 14 | 1512,"[8, 9]"
 15 | 1513,"[2, 3, 5, 6, 7]"
 16 | 1514,"[0, 4, 5, 6, 7]"
 17 | 1515,"[1, 3, 6]"
 18 | 1516,"[2, 3, 4, 6]"
 19 | 1517,"[2, 4, 5]"
 20 | 1518,"[2, 3, 4, 6]"
 21 | 1519,[8]
 22 | 1520,"[6, 7, 8]"
 23 | 1521,"[0, 1, 2, 4, 6]"
 24 | 1522,"[7, 8, 9]"
 25 | 1523,"[1, 2, 4, 6, 9]"
 26 | 1524,"[2, 6, 8]"
 27 | 1525,"[3, 5, 6, 7]"
 28 | 1526,"[1, 3, 5]"
 29 | 1527,"[1, 7]"
 30 | 1528,"[1, 4, 9]"
 31 | 1529,"[2, 4, 7, 8, 9]"
 32 | 1530,"[2, 8, 9]"
 33 | 1531,"[2, 3, 5, 6]"
 34 | 1532,"[1, 3, 6]"
 35 | 1533,"[7, 9]"
 36 | 1534,"[6, 7, 8]"
 37 | 1535,"[2, 4, 5]"
 38 | 1536,"[6, 7, 9]"
 39 | 1537,"[6, 8]"
 40 | 1538,"[0, 3, 4, 6]"
 41 | 1539,"[2, 4, 7, 8]"
 42 | 1540,"[1, 7, 8, 9]"
 43 | 1541,"[1, 4, 6]"
 44 | 1542,"[1, 2, 6, 8]"
 45 | 1543,"[1, 2, 4, 5, 7]"
 46 | 1544,"[6, 7, 8]"
 47 | 1545,"[6, 8]"
 48 | 1546,"[0, 2, 5, 6]"
 49 | 1547,"[2, 4, 5, 7, 9]"
 50 | 1548,"[1, 2, 5, 6]"
 51 | 1549,"[2, 5]"
 52 | 1550,[5]
 53 | 1551,"[4, 6, 8]"
 54 | 1552,"[1, 6, 7, 9]"
 55 | 1553,"[1, 3, 6]"
 56 | 1554,"[0, 2, 3, 7]"
 57 | 1555,"[2, 3, 5]"
 58 | 1556,"[3, 4, 9]"
 59 | 1557,"[7, 8, 9]"
 60 | 1558,"[1, 2, 3, 4, 5, 6]"
 61 | 1559,"[8, 9]"
 62 | 1560,"[1, 7, 8, 9]"
 63 | 1561,"[2, 4, 5]"
 64 | 1562,"[0, 2, 4, 5, 6]"
 65 | 1563,"[1, 2, 4, 6, 9]"
 66 | 1564,"[0, 1, 3, 4, 6]"
 67 | 1565,"[2, 4, 9]"
 68 | 1566,"[2, 3, 4, 9]"
 69 | 1567,"[1, 6, 7, 9]"
 70 | 1568,"[1, 2, 6, 8]"
 71 | 1569,"[1, 9]"
 72 | 1570,"[2, 5]"
 73 | 1571,"[7, 8]"
 74 | 1572,"[1, 7, 8, 9]"
 75 | 1573,"[1, 3, 6]"
 76 | 1574,"[5, 7]"
 77 | 1575,"[1, 2, 4, 6]"
 78 | 1576,"[7, 8, 9]"
 79 | 1577,"[1, 6, 7, 9]"
 80 | 1578,"[0, 1, 2, 4, 7, 9]"
 81 | 1579,"[4, 5, 6, 7, 9]"
 82 | 1580,"[2, 4, 5]"
 83 | 1581,"[3, 7, 9]"
 84 | 1582,"[7, 9]"
 85 | 1583,"[1, 3, 4, 6, 9]"
 86 | 1584,"[2, 4, 7]"
 87 | 1585,"[1, 5]"
 88 | 1586,"[0, 2, 3, 5]"
 89 | 1587,"[1, 2, 6, 7, 8, 9]"
 90 | 1588,"[4, 6, 8]"
 91 | 1589,"[2, 4, 5]"
 92 | 1590,"[0, 2, 4, 6, 7]"
 93 | 1591,"[3, 6, 8]"
 94 | 1592,"[1, 6, 8, 9]"
 95 | 1593,"[1, 2, 5]"
 96 | 1594,"[6, 7]"
 97 | 1595,"[1, 2, 5, 6]"
 98 | 1596,"[2, 3, 4, 7, 9]"
 99 | 1597,"[5, 7]"
100 | 1598,"[1, 2, 6, 7, 8, 9]"
101 | 1599,"[2, 3, 4, 6, 7]"
102 | 


--------------------------------------------------------------------------------
/data_od_evaluation/letter_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 1500,"[2, 3, 9]"
  3 | 1501,"[4, 8]"
  4 | 1502,[5]
  5 | 1503,"[0, 3, 4, 6]"
  6 | 1504,"[1, 6, 9]"
  7 | 1505,"[1, 3, 4, 5, 9]"
  8 | 1506,"[1, 4, 7, 8]"
  9 | 1507,"[4, 9]"
 10 | 1508,"[1, 2, 4, 9]"
 11 | 1509,"[1, 4, 6, 8, 9]"
 12 | 1510,"[3, 4, 5, 6, 8, 9]"
 13 | 1511,"[0, 3, 4, 5, 6]"
 14 | 1512,"[0, 8]"
 15 | 1513,"[2, 3, 4, 5, 6]"
 16 | 1514,"[0, 3, 4, 6, 7]"
 17 | 1515,"[0, 1, 3, 5, 6]"
 18 | 1516,"[0, 4]"
 19 | 1517,"[0, 4, 5, 6]"
 20 | 1518,"[2, 3, 4]"
 21 | 1519,[8]
 22 | 1520,"[6, 7, 8]"
 23 | 1521,"[0, 1, 2, 4, 6, 9]"
 24 | 1522,"[0, 9]"
 25 | 1523,"[2, 4, 5, 6, 9]"
 26 | 1524,"[1, 5, 6, 8]"
 27 | 1525,"[3, 5, 6, 7]"
 28 | 1526,"[0, 1, 2, 5]"
 29 | 1527,"[1, 2, 3, 4, 7]"
 30 | 1528,"[4, 9]"
 31 | 1529,"[2, 4, 5, 6, 7, 8]"
 32 | 1530,"[2, 4, 8, 9]"
 33 | 1531,"[3, 5, 6]"
 34 | 1532,"[0, 3, 4, 6]"
 35 | 1533,"[1, 3, 5, 7, 9]"
 36 | 1534,"[3, 4, 5, 6, 7]"
 37 | 1535,"[2, 4, 9]"
 38 | 1536,"[1, 7, 9]"
 39 | 1537,"[3, 4, 6, 8]"
 40 | 1538,"[0, 4, 7]"
 41 | 1539,"[0, 4]"
 42 | 1540,"[1, 7, 8, 9]"
 43 | 1541,"[0, 1, 4, 6, 7]"
 44 | 1542,"[1, 2, 4, 5, 6, 8]"
 45 | 1543,"[1, 2, 4, 5]"
 46 | 1544,"[2, 6, 7, 8, 9]"
 47 | 1545,[6]
 48 | 1546,"[0, 2, 5, 6, 7, 8]"
 49 | 1547,"[0, 4]"
 50 | 1548,"[2, 5]"
 51 | 1549,"[2, 4, 5, 9]"
 52 | 1550,[5]
 53 | 1551,"[4, 9]"
 54 | 1552,"[6, 9]"
 55 | 1553,"[0, 1, 5, 6, 7]"
 56 | 1554,"[0, 1, 2, 3, 7]"
 57 | 1555,"[0, 3, 5, 8]"
 58 | 1556,"[3, 4, 9]"
 59 | 1557,"[3, 5, 6, 7, 9]"
 60 | 1558,"[2, 3, 4, 5, 6, 9]"
 61 | 1559,"[0, 8, 9]"
 62 | 1560,"[0, 1, 5]"
 63 | 1561,"[4, 9]"
 64 | 1562,"[0, 2, 4, 5, 6]"
 65 | 1563,"[2, 4, 5, 6, 9]"
 66 | 1564,"[0, 3, 6, 7]"
 67 | 1565,"[0, 1, 2, 6, 9]"
 68 | 1566,[9]
 69 | 1567,"[1, 4, 5, 7]"
 70 | 1568,"[1, 2, 3, 4, 7, 8]"
 71 | 1569,"[0, 1]"
 72 | 1570,[5]
 73 | 1571,"[4, 7]"
 74 | 1572,"[0, 4, 9]"
 75 | 1573,"[0, 1]"
 76 | 1574,"[7, 8]"
 77 | 1575,"[0, 1, 4, 6]"
 78 | 1576,"[4, 7, 9]"
 79 | 1577,"[1, 2, 3, 4, 7, 8, 9]"
 80 | 1578,"[0, 1, 3, 4, 9]"
 81 | 1579,"[0, 4, 7]"
 82 | 1580,"[1, 2, 5, 6, 8, 9]"
 83 | 1581,"[0, 7, 8, 9]"
 84 | 1582,"[4, 9]"
 85 | 1583,"[1, 3, 4, 6, 8, 9]"
 86 | 1584,"[2, 3, 4, 7]"
 87 | 1585,"[1, 5]"
 88 | 1586,"[0, 2]"
 89 | 1587,"[2, 3, 7, 8, 9]"
 90 | 1588,"[2, 4, 5, 6, 9]"
 91 | 1589,"[2, 5, 6, 7]"
 92 | 1590,"[2, 4]"
 93 | 1591,"[3, 4, 8]"
 94 | 1592,"[1, 3, 4, 6, 8, 9]"
 95 | 1593,"[3, 5]"
 96 | 1594,"[5, 7]"
 97 | 1595,"[1, 2, 5, 6, 9]"
 98 | 1596,[9]
 99 | 1597,"[4, 7]"
100 | 1598,"[2, 3, 9]"
101 | 1599,"[0, 2, 3, 4, 6, 7]"
102 | 


--------------------------------------------------------------------------------
/data_od_evaluation/ionosphere_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 1,"[2, 9]"
  3 | 3,"[1, 2, 3, 7, 9]"
  4 | 5,"[2, 9]"
  5 | 7,"[6, 8]"
  6 | 9,"[2, 3]"
  7 | 11,"[0, 3, 6, 8]"
  8 | 13,"[0, 6, 9]"
  9 | 15,"[4, 9]"
 10 | 17,[5]
 11 | 19,"[1, 6, 8]"
 12 | 21,"[0, 1, 2, 7]"
 13 | 23,"[4, 7, 9]"
 14 | 25,"[0, 1]"
 15 | 27,[7]
 16 | 29,"[1, 4, 9]"
 17 | 31,"[0, 4, 9]"
 18 | 33,"[0, 3, 5, 8, 9]"
 19 | 35,"[0, 4, 7]"
 20 | 37,"[0, 1, 4, 8, 9]"
 21 | 39,"[0, 1, 6]"
 22 | 41,"[0, 1, 5, 7]"
 23 | 43,"[0, 7, 8]"
 24 | 45,"[0, 6, 8]"
 25 | 47,"[0, 1, 5, 6]"
 26 | 49,"[2, 5, 7]"
 27 | 51,"[4, 7, 9]"
 28 | 53,[4]
 29 | 55,[0]
 30 | 57,"[4, 7]"
 31 | 59,"[1, 2, 4, 9]"
 32 | 61,[4]
 33 | 63,"[1, 2, 4, 6, 7]"
 34 | 65,"[0, 6, 9]"
 35 | 67,"[0, 1, 3, 6, 7]"
 36 | 69,"[0, 8]"
 37 | 71,"[1, 4, 5, 6, 7, 8]"
 38 | 73,[6]
 39 | 75,[6]
 40 | 77,"[1, 4]"
 41 | 79,"[1, 2, 4]"
 42 | 81,"[0, 1, 5, 6, 7]"
 43 | 83,[1]
 44 | 85,"[7, 9]"
 45 | 87,"[2, 5]"
 46 | 89,[6]
 47 | 91,"[5, 6, 8]"
 48 | 93,[6]
 49 | 95,"[0, 1, 2, 3]"
 50 | 98,"[1, 2, 7]"
 51 | 100,"[0, 1, 3]"
 52 | 102,"[2, 9]"
 53 | 104,"[2, 6, 7, 8]"
 54 | 106,"[4, 7]"
 55 | 108,"[1, 4, 5, 7]"
 56 | 110,"[6, 7]"
 57 | 112,"[0, 7, 8]"
 58 | 114,[6]
 59 | 116,[8]
 60 | 118,[5]
 61 | 120,"[8, 9]"
 62 | 122,"[6, 7]"
 63 | 124,"[7, 9]"
 64 | 126,"[2, 8]"
 65 | 128,"[2, 4, 8, 9]"
 66 | 130,"[0, 5, 6]"
 67 | 132,"[4, 8, 9]"
 68 | 134,"[0, 3, 4]"
 69 | 136,"[0, 1, 5, 6]"
 70 | 138,"[2, 3]"
 71 | 140,"[0, 5]"
 72 | 142,"[0, 1, 5]"
 73 | 144,[1]
 74 | 146,[7]
 75 | 148,"[0, 1, 2, 8, 9]"
 76 | 150,[9]
 77 | 152,"[2, 3]"
 78 | 154,"[2, 3]"
 79 | 156,"[2, 3]"
 80 | 158,"[2, 3]"
 81 | 160,"[2, 3]"
 82 | 162,[3]
 83 | 164,"[1, 9]"
 84 | 166,"[6, 7]"
 85 | 168,"[2, 3]"
 86 | 170,"[1, 2, 7, 8]"
 87 | 172,"[2, 9]"
 88 | 174,"[6, 7]"
 89 | 176,"[2, 4]"
 90 | 178,"[2, 3]"
 91 | 180,"[6, 9]"
 92 | 182,"[1, 2, 4, 9]"
 93 | 184,"[4, 9]"
 94 | 186,[8]
 95 | 188,"[0, 1, 2, 3, 6]"
 96 | 190,"[1, 2, 3, 4, 6, 8]"
 97 | 192,"[1, 2, 6]"
 98 | 194,"[1, 5, 6]"
 99 | 196,"[1, 2, 4]"
100 | 198,"[2, 7, 9]"
101 | 200,"[4, 9]"
102 | 202,[5]
103 | 204,"[1, 6, 7]"
104 | 206,"[0, 1, 6]"
105 | 208,"[6, 9]"
106 | 210,"[1, 6]"
107 | 212,"[0, 3, 9]"
108 | 214,"[0, 2, 3, 4]"
109 | 216,"[0, 4, 6, 7]"
110 | 218,"[1, 6]"
111 | 220,"[0, 1, 5]"
112 | 222,"[2, 7]"
113 | 224,"[6, 8]"
114 | 226,"[6, 8]"
115 | 228,"[0, 2, 7]"
116 | 230,"[2, 3]"
117 | 232,[2]
118 | 234,"[0, 4]"
119 | 236,[1]
120 | 238,"[2, 3]"
121 | 240,"[2, 3]"
122 | 242,"[3, 7]"
123 | 244,"[0, 5]"
124 | 246,"[1, 8]"
125 | 248,"[2, 9]"
126 | 250,"[5, 7]"
127 | 252,"[0, 5, 6, 7]"
128 | 


--------------------------------------------------------------------------------
/data_od_evaluation/optdigits_pca_gt_hbos.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 5066,"[0, 4, 9]"
  3 | 5067,[6]
  4 | 5068,[6]
  5 | 5069,"[0, 6]"
  6 | 5070,[6]
  7 | 5071,[6]
  8 | 5072,[6]
  9 | 5073,"[2, 6]"
 10 | 5074,[6]
 11 | 5075,[6]
 12 | 5076,[6]
 13 | 5077,[6]
 14 | 5078,[6]
 15 | 5079,[6]
 16 | 5080,[6]
 17 | 5081,"[1, 3, 6, 7]"
 18 | 5082,[6]
 19 | 5083,[6]
 20 | 5084,[6]
 21 | 5085,"[0, 6]"
 22 | 5086,[6]
 23 | 5087,[6]
 24 | 5088,[6]
 25 | 5089,[6]
 26 | 5090,"[0, 2, 6]"
 27 | 5091,[6]
 28 | 5092,"[0, 3, 6]"
 29 | 5093,"[0, 3, 6]"
 30 | 5094,[6]
 31 | 5095,[6]
 32 | 5096,[6]
 33 | 5097,"[0, 6]"
 34 | 5098,[6]
 35 | 5099,[6]
 36 | 5100,[6]
 37 | 5101,[6]
 38 | 5102,"[1, 3, 6, 7]"
 39 | 5103,[6]
 40 | 5104,[6]
 41 | 5105,"[0, 2, 6]"
 42 | 5106,"[0, 2, 3, 6, 8]"
 43 | 5107,[6]
 44 | 5108,[3]
 45 | 5109,[6]
 46 | 5110,[6]
 47 | 5111,[6]
 48 | 5112,[6]
 49 | 5113,[6]
 50 | 5114,"[0, 2, 6]"
 51 | 5115,[6]
 52 | 5116,[6]
 53 | 5117,[6]
 54 | 5118,"[2, 6]"
 55 | 5119,"[0, 6]"
 56 | 5120,[6]
 57 | 5121,"[0, 1, 2, 3, 6, 8]"
 58 | 5122,[6]
 59 | 5123,[6]
 60 | 5124,[6]
 61 | 5125,[6]
 62 | 5126,"[0, 1, 2, 6]"
 63 | 5127,[6]
 64 | 5128,[6]
 65 | 5129,"[6, 8]"
 66 | 5130,[6]
 67 | 5131,[3]
 68 | 5132,[6]
 69 | 5133,[3]
 70 | 5134,[3]
 71 | 5135,"[2, 3, 6]"
 72 | 5136,[6]
 73 | 5137,"[0, 2, 5, 6]"
 74 | 5138,"[3, 6]"
 75 | 5139,[6]
 76 | 5140,[6]
 77 | 5141,"[0, 3, 6, 7]"
 78 | 5142,[6]
 79 | 5143,[6]
 80 | 5144,"[1, 2, 4, 6, 7]"
 81 | 5145,[6]
 82 | 5146,"[0, 4, 9]"
 83 | 5147,[6]
 84 | 5148,"[0, 1, 6, 8]"
 85 | 5149,[6]
 86 | 5150,"[0, 2, 4, 6, 7]"
 87 | 5151,[6]
 88 | 5152,"[0, 1, 6]"
 89 | 5153,[6]
 90 | 5154,[6]
 91 | 5155,[6]
 92 | 5156,[6]
 93 | 5157,"[0, 2, 4, 6, 7]"
 94 | 5158,"[3, 8, 9]"
 95 | 5159,[6]
 96 | 5160,"[0, 3, 6]"
 97 | 5161,"[0, 6]"
 98 | 5162,"[0, 2, 6]"
 99 | 5163,[6]
100 | 5164,[6]
101 | 5165,"[0, 6]"
102 | 5166,[3]
103 | 5167,[6]
104 | 5168,[6]
105 | 5169,[6]
106 | 5170,[6]
107 | 5171,"[0, 3, 6]"
108 | 5172,[0]
109 | 5173,[6]
110 | 5174,"[2, 4, 6]"
111 | 5175,[6]
112 | 5176,[6]
113 | 5177,"[0, 6]"
114 | 5178,[6]
115 | 5179,[3]
116 | 5180,[6]
117 | 5181,[6]
118 | 5182,"[0, 6]"
119 | 5183,[6]
120 | 5184,"[0, 6, 9]"
121 | 5185,"[0, 3, 5, 6, 7, 8]"
122 | 5186,[6]
123 | 5187,[6]
124 | 5188,"[0, 6]"
125 | 5189,[6]
126 | 5190,"[0, 6]"
127 | 5191,[6]
128 | 5192,[6]
129 | 5193,"[0, 6]"
130 | 5194,[6]
131 | 5195,[6]
132 | 5196,"[6, 7]"
133 | 5197,"[6, 8]"
134 | 5198,[6]
135 | 5199,"[0, 6]"
136 | 5200,"[0, 1, 2, 6]"
137 | 5201,[6]
138 | 5202,"[3, 7, 8, 9]"
139 | 5203,"[0, 4, 9]"
140 | 5204,"[2, 3, 6, 8, 9]"
141 | 5205,"[0, 2, 3, 6, 8]"
142 | 5206,[3]
143 | 5207,[6]
144 | 5208,[9]
145 | 5209,[6]
146 | 5210,"[0, 2, 6, 8]"
147 | 5211,[6]
148 | 5212,[6]
149 | 5213,[6]
150 | 5214,[6]
151 | 5215,[6]
152 | 


--------------------------------------------------------------------------------
/utils/synthetic_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def generate_data(n_nor, n_ano, dim, rate=0, n_nor_c=1, n_ano_c=1):
 6 |     if rate > 0:
 7 |         n_ano = int(n_nor * rate)
 8 | 
 9 |     # normal class with "n_nor_c" clusters
10 |     x_nor = np.zeros([n_nor, dim])
11 |     for i in range(dim):
12 |         size = round(n_nor / n_nor_c)
13 |         for j in range(n_nor_c):
14 |             loc = np.random.rand()
15 |             scale = float(np.random.rand())
16 |             print("Inlier: dim"+str(i), "cluster"+str(j), round(loc, 1), round(scale, 2))
17 |             # last c
18 |             if j == n_nor_c - 1:
19 |                 last_size = n_nor - (n_nor_c-1)*size
20 |                 x_nor[j * size:, i] = np.random.normal(loc, scale, last_size)
21 |             else:
22 |                 x_nor[j * size: (j+1)*size, i] = np.random.normal(loc, scale, size)
23 | 
24 |     x_ano = np.zeros([n_ano, dim])
25 |     for i in range(dim):
26 |         size = round(n_ano / n_ano_c)
27 |         for j in range(n_ano_c):
28 |             loc = np.random.rand() + 1
29 |             scale = float(np.random.rand())
30 |             print("anomaly: dim"+str(i), "cluster"+str(j), round(loc, 1), round(scale, 2))
31 | 
32 |             # last c
33 |             if j != n_ano_c - 1:
34 |                 x_ano[j*size: (j+1)*size, i] = np.random.normal(loc, scale, size)
35 |             else:
36 |                 last_size = n_ano - (n_ano_c - 1) * size
37 |                 x_ano[j*size:, i] = np.random.normal(loc, scale, last_size)
38 |             # x_ano[:, i] = np.random.normal(loc, scale, n_ano)
39 | 
40 |     x = np.concatenate([x_ano, x_nor], axis=0)
41 |     y = np.append(np.ones(n_ano, dtype=int), np.zeros(n_nor, dtype=int))
42 |     matrix = np.concatenate([x, y.reshape([x.shape[0], 1])], axis=1)
43 | 
44 |     columns = ["A"+str(i) for i in range(dim)]
45 |     columns.append("class")
46 |     df = pd.DataFrame(matrix, columns=columns)
47 |     df['class'] = df['class'].astype(int)
48 |     return df
49 | 
50 | 
51 | # Scal-up Test
52 | dim_range = [8, 32, 128, 512, 2048]
53 | size_range = [1000, 4000, 16000, 64000, 256000]
54 | 
55 | 
56 | root = "../scal_data/"
57 | for ii, dim in enumerate(dim_range):
58 |     n_nor = 995
59 |     n_ano = 5
60 |     size = n_nor + n_ano
61 |     df = generate_data(n_nor=n_nor, n_ano=n_ano, dim=dim)
62 |     name = "scal_dim" + str(ii) + "_" + str(size) + "-" + str(dim) + ".csv"
63 |     df.to_csv(root + name, index=False)
64 | 
65 | for ii, size in enumerate(size_range):
66 |     dim = 32
67 |     n_nor = int(size * 0.995)
68 |     n_ano = int(size * 0.005)
69 |     df = generate_data(n_nor=n_nor, n_ano=n_ano, dim=dim)
70 |     name = "scal_size" + str(ii) + "_" + str(size) + "-" + str(dim) + ".csv"
71 |     df.to_csv(root + name, index=False)
72 | 


--------------------------------------------------------------------------------
/model_coin/prediction_strength.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.cluster import KMeans
 3 | 
 4 | 
 5 | def ClosestCenter(point, centroids):
 6 |     # Find the closest center over all centroids
 7 |     min_index = -1
 8 |     min_dist = float('inf')
 9 |     for i in range(len(centroids)):
10 |         center = centroids[i]
11 |         dist_cur = np.linalg.norm(point - center)
12 |         if dist_cur < min_dist:
13 |             min_index = i
14 |             min_dist = dist_cur
15 | 
16 |     return min_index
17 | 
18 | 
19 | def PredictionStrength(data_test, test_labels, train_centers, c):
20 |     # Compute prediction strength under c clusters
21 |     pred_strength = np.zeros(c)
22 |     for cc in range(c):
23 |         num_cc = test_labels.tolist().count(cc)
24 |         count = 0.
25 |         for i in range(len(test_labels)-1):
26 |             for j in range(i+1, len(test_labels)):
27 |                 if test_labels[i] == test_labels[j] == cc:
28 |                     pi = data_test[i]
29 |                     pj = data_test[j]
30 |                     if ClosestCenter(pi, train_centers) == ClosestCenter(pj, train_centers):
31 |                         count += 1
32 | 
33 |         if num_cc <= 1:
34 |             pred_strength[cc] = float('inf')
35 |         else:
36 |             pred_strength[cc] = count/(num_cc * (num_cc-1)/2.)
37 | 
38 |     return min(pred_strength)
39 | 
40 | 
41 | def optimalK(data, num_fold, maxClusters=5, THRE_PS=0.90):
42 |     # Find the best number of clusters using prediction strength
43 |     num_data = data.shape[0]
44 |     num_feat = data.shape[1]
45 | 
46 |     pred_strength_avg = np.zeros(maxClusters+1)
47 |     for nf in range(num_fold):
48 |         # Split into training and testing samples
49 |         inds_train = np.random.choice(num_data, int(num_data*0.5), replace=False)
50 |         inds_test = list(set(range(num_data)).difference(inds_train))
51 |         data_train = data[inds_train]
52 |         data_test = data[inds_test]
53 | 
54 |         pred_strength_cur = np.zeros(maxClusters+1)
55 |         for c in range(1, maxClusters+1):
56 |             train_cluster = KMeans(n_clusters=c).fit(data_train)
57 |             test_cluster = KMeans(n_clusters=c).fit(data_test)
58 |             pred_strength_cur[c] = PredictionStrength(data_test, test_cluster.labels_, train_cluster.cluster_centers_, c)
59 | 
60 |         pred_strength_avg += pred_strength_cur
61 | 
62 |     pred_strength_avg /= num_fold
63 |     # print("Prediction Strength vec: ", pred_strength_avg)
64 | 
65 |     k_optimal = max([i for i,j in enumerate(pred_strength_avg) if j > THRE_PS])
66 | 
67 |     return k_optimal
68 | 
69 | 
70 | # if __name__ == "__main__":
71 | #     x, y = make_blobs(1000, n_features=5, centers=3)
72 | #     plt.scatter(x[:, 0], x[:, 1])
73 | #     plt.show()
74 | #
75 | #     k = optimalK(x, 10)
76 | #     print('Optimal k is: ', k)


--------------------------------------------------------------------------------
/data_od_evaluation/ionosphere_pca_gt_copod.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 1,"[2, 5, 9]"
  3 | 3,"[2, 3, 6, 7, 8]"
  4 | 5,"[0, 2, 3, 8]"
  5 | 7,"[2, 3, 8]"
  6 | 9,"[0, 1, 3, 6]"
  7 | 11,"[0, 8]"
  8 | 13,"[0, 2, 5, 6, 7, 8]"
  9 | 15,"[2, 5, 6, 7, 8, 9]"
 10 | 17,[9]
 11 | 19,"[5, 8]"
 12 | 21,"[0, 3, 4]"
 13 | 23,"[4, 7, 9]"
 14 | 25,"[0, 1, 2, 4, 5, 7]"
 15 | 27,"[2, 3, 6, 7, 8]"
 16 | 29,"[1, 3, 6, 7, 9]"
 17 | 31,"[0, 3, 4]"
 18 | 33,"[4, 5, 8]"
 19 | 35,"[7, 9]"
 20 | 37,"[1, 3, 4, 8]"
 21 | 39,"[1, 6, 7]"
 22 | 41,"[0, 5, 7, 8, 9]"
 23 | 43,"[7, 8]"
 24 | 45,"[0, 4, 6]"
 25 | 47,"[5, 6, 7]"
 26 | 49,"[0, 2, 5, 7]"
 27 | 51,"[4, 7, 9]"
 28 | 53,"[2, 3, 4, 6]"
 29 | 55,[0]
 30 | 57,"[3, 4]"
 31 | 59,"[1, 2, 3, 6, 8]"
 32 | 61,"[2, 6, 7]"
 33 | 63,"[4, 6]"
 34 | 65,"[1, 2, 7, 8]"
 35 | 67,"[5, 6, 7]"
 36 | 69,[8]
 37 | 71,"[1, 5, 6, 7]"
 38 | 73,"[1, 3, 6]"
 39 | 75,[6]
 40 | 77,[4]
 41 | 79,"[2, 4]"
 42 | 81,"[2, 5, 6, 7, 8]"
 43 | 83,"[1, 5]"
 44 | 85,"[1, 9]"
 45 | 87,"[2, 6, 8, 9]"
 46 | 89,"[0, 1, 8]"
 47 | 91,"[4, 5, 6, 8]"
 48 | 93,"[2, 3, 4, 6]"
 49 | 95,"[0, 1]"
 50 | 98,"[1, 2, 5, 6, 7]"
 51 | 100,"[1, 5]"
 52 | 102,"[2, 4, 6, 7, 8, 9]"
 53 | 104,"[2, 5, 8]"
 54 | 106,"[3, 4]"
 55 | 108,"[1, 4, 5, 7, 9]"
 56 | 110,"[1, 2, 5, 7, 9]"
 57 | 112,"[5, 8]"
 58 | 114,"[0, 1, 3, 6]"
 59 | 116,"[4, 7, 8]"
 60 | 118,"[2, 5, 6, 7, 8]"
 61 | 120,"[7, 8]"
 62 | 122,"[6, 7, 9]"
 63 | 124,"[4, 5, 6, 7]"
 64 | 126,"[2, 6, 8, 9]"
 65 | 128,"[2, 6, 7, 8, 9]"
 66 | 130,"[4, 5, 7, 9]"
 67 | 132,"[4, 9]"
 68 | 134,"[4, 9]"
 69 | 136,"[0, 4]"
 70 | 138,"[0, 2, 3, 4, 6]"
 71 | 140,[5]
 72 | 142,"[1, 4, 5, 7, 9]"
 73 | 144,"[1, 7]"
 74 | 146,"[0, 2, 5, 7]"
 75 | 148,"[1, 2, 3, 9]"
 76 | 150,"[1, 7, 9]"
 77 | 152,"[0, 2, 3, 7, 8]"
 78 | 154,"[0, 1, 3, 8]"
 79 | 156,"[0, 3]"
 80 | 158,"[0, 2, 3, 6]"
 81 | 160,"[0, 3]"
 82 | 162,[3]
 83 | 164,"[1, 4, 6, 7, 8, 9]"
 84 | 166,"[0, 4, 6]"
 85 | 168,"[0, 2, 3, 4, 8]"
 86 | 170,"[2, 4, 7, 9]"
 87 | 172,"[2, 8, 9]"
 88 | 174,"[6, 8]"
 89 | 176,"[2, 3, 6, 8]"
 90 | 178,"[0, 3, 6]"
 91 | 180,"[6, 7, 8, 9]"
 92 | 182,"[1, 2, 4, 7, 9]"
 93 | 184,"[2, 8, 9]"
 94 | 186,"[1, 2, 3, 4, 6, 7, 8]"
 95 | 188,"[3, 4, 6]"
 96 | 190,"[1, 2, 3, 4, 6, 8]"
 97 | 192,"[1, 2, 3, 6, 8]"
 98 | 194,"[3, 6]"
 99 | 196,"[1, 2, 3, 4, 6]"
100 | 198,"[5, 9]"
101 | 200,"[4, 7, 8, 9]"
102 | 202,"[0, 1, 2, 3, 4, 5]"
103 | 204,"[0, 1, 3, 6]"
104 | 206,"[0, 1, 6]"
105 | 208,"[1, 6, 7, 9]"
106 | 210,"[2, 3, 6, 7, 9]"
107 | 212,"[0, 3, 8]"
108 | 214,"[1, 2, 3, 4, 9]"
109 | 216,"[0, 1, 7, 8]"
110 | 218,"[4, 5, 6, 7]"
111 | 220,"[0, 3, 5, 6]"
112 | 222,"[6, 7]"
113 | 224,"[6, 7, 8, 9]"
114 | 226,"[0, 1, 3, 6]"
115 | 228,"[2, 9]"
116 | 230,"[1, 3, 4, 8]"
117 | 232,"[2, 4, 5]"
118 | 234,"[1, 2, 4, 6, 8]"
119 | 236,"[0, 1]"
120 | 238,"[2, 3, 8]"
121 | 240,"[2, 4, 6, 7, 8, 9]"
122 | 242,"[5, 6, 7]"
123 | 244,"[4, 5, 7, 9]"
124 | 246,"[0, 1, 3, 8]"
125 | 248,"[2, 4, 6, 7, 8, 9]"
126 | 250,"[7, 9]"
127 | 252,"[4, 5, 6, 7]"
128 | 


--------------------------------------------------------------------------------
/model_aton/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script implements an outlier interpretation method of the following paper:
 3 | "Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network". in WWW'21.
 4 | @ Author: Hongzuo Xu
 5 | @ email: hongzuo.xu@gmail.com or leogarcia@126.com or xuhongzuo13@nudt.edu.cn
 6 | """
 7 | 
 8 | 
 9 | import numpy as np
10 | import torch
11 | import random, string
12 | import os
13 | mask = ''.join(random.sample(string.ascii_letters, 8))
14 | 
15 | 
16 | def min_max_normalize(x):
17 |     n, dim = x.shape
18 |     x_n = np.zeros(x.shape)
19 |     for i in range(dim):
20 |         array = x[:, i]
21 |         _min, _max = np.min(array), np.max(array)
22 |         if _min == _max:
23 |             x_n[:, i] = np.zeros(n)
24 |         else:
25 |             x_n[:, i] = (array - _min) / (_max - _min)
26 | 
27 |     return x_n
28 | 
29 | 
30 | class EarlyStopping:
31 |     """Early stops the training if validation loss doesn't improve after a given patience."""
32 |     def __init__(self, patience=7, verbose=False, delta=0, path="checkpoints/" + mask + '_checkpoint.pt', trace_func=print):
33 |         """
34 |         Args:
35 |             patience (int): How long to wait after last time validation loss improved.
36 |                             Default: 7
37 |             verbose (bool): If True, prints a message for each validation loss improvement.
38 |                             Default: False
39 |             delta (float): Minimum change in the monitored quantity to qualify as an improvement.
40 |                             Default: 0
41 |             path (str): Path for the checkpoint to be saved to.
42 |                             Default: 'checkpoint.pt'
43 |             trace_func (function): trace print function.
44 |                             Default: print
45 |         """
46 |         self.patience = patience
47 |         self.verbose = verbose
48 |         self.counter = 0
49 |         self.best_score = None
50 |         self.early_stop = False
51 |         self.val_loss_min = np.Inf
52 |         self.delta = delta
53 |         self.path = path
54 |         self.trace_func = trace_func
55 | 
56 |     def __call__(self, val_loss, model):
57 |         score = -val_loss
58 |         if self.best_score is None:
59 |             self.best_score = score
60 |             self.save_checkpoint(val_loss, model)
61 |         elif score < self.best_score + self.delta:
62 |             self.counter += 1
63 |             # self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
64 |             if self.counter >= self.patience:
65 |                 self.early_stop = True
66 |         else:
67 |             self.best_score = score
68 |             self.save_checkpoint(val_loss, model)
69 |             self.counter = 0
70 | 
71 |     def save_checkpoint(self, val_loss, model):
72 |         """Saves model when validation loss decrease."""
73 |         if self.verbose:
74 |             self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
75 |         torch.save(model.state_dict(), self.path)
76 |         self.val_loss_min = val_loss


--------------------------------------------------------------------------------
/model_iml/IntGrad.py:
--------------------------------------------------------------------------------
 1 | import sklearn
 2 | import numpy as np
 3 | from alibi.explainers import IntegratedGradients
 4 | from tensorflow.keras.layers import Dense, Input
 5 | from tensorflow.keras.models import Model
 6 | from tensorflow.keras.utils import to_categorical
 7 | from tensorflow.keras import optimizers
 8 | 
 9 | 
10 | class IntGrad:
11 |     def __init__(self, n_steps=50, method="gausslegendre"):
12 |         """
13 | 
14 |         :param n_steps:
15 |         :param method:
16 |         """
17 |         self.clf_batch_size = 64
18 |         self.clf_epochs = 30
19 | 
20 |         self.n_steps = n_steps
21 |         self.method = method
22 | 
23 |         self.ano_idx = None
24 |         self.nor_idx = None
25 | 
26 |         self.dim = None
27 |         return
28 | 
29 |     def fit(self, x, y):
30 |         self.dim = x.shape[1]
31 |         x = min_max_normalize(x)
32 |         # x = z_score_normalize(x)
33 |         y_oh = to_categorical(y, 2)
34 |         clf = self.nn_model()
35 |         clf.fit(x, y_oh, batch_size=self.clf_batch_size, epochs=self.clf_epochs, verbose=1)
36 |         y_pred = clf(x).numpy().argmax(axis=1)
37 |         print("Clf model accuracy: [{:.4f}]".format(sklearn.metrics.accuracy_score(y, y_pred)))
38 | 
39 |         # Initialize IntegratedGradients instance
40 |         ig = IntegratedGradients(clf, n_steps=self.n_steps, method=self.method)
41 | 
42 |         # Calculate attributions for the first 10 images in the test set
43 |         self.ano_idx = np.where(y == 1)[0]
44 |         x_ano = x[self.ano_idx]
45 |         # predictions = clf(x_ano).numpy().argmax(axis=1)
46 |         predictions = np.ones(len(self.ano_idx), dtype=int)
47 | 
48 |         self.nor_idx = np.where(y == 0)[0]
49 |         x_nor = x[self.nor_idx]
50 |         x_nor_avg = np.average(x_nor, axis=0)
51 |         baselines = np.array([x_nor_avg] * len(self.ano_idx))
52 |         explanation = ig.explain(x_ano, baselines=baselines, target=predictions)
53 | 
54 |         fea_weight_lst = explanation.data['attributions']
55 |         return fea_weight_lst
56 | 
57 |     def nn_model(self):
58 |         x_in = Input(shape=(self.dim,))
59 |         x = Dense(10, activation='relu')(x_in)
60 |         # x = Dense(10, activation='relu')(x)
61 |         x_out = Dense(2, activation='softmax')(x)
62 |         nn = Model(inputs=x_in, outputs=x_out)
63 |         sgd = optimizers.SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
64 |         nn.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
65 |         return nn
66 | 
67 | 
68 | def min_max_normalize(x):
69 |     n, dim = x.shape
70 |     x_n = np.zeros(x.shape)
71 |     for i in range(dim):
72 |         array = x[:, i]
73 |         _min, _max = np.min(array), np.max(array)
74 |         if _min == _max:
75 |             x_n[:, i] = np.zeros(n)
76 |         else:
77 |             x_n[:, i] = (array - _min) / (_max - _min)
78 | 
79 |     return x_n
80 | 
81 | 
82 | def z_score_normalize(x):
83 |     n, dim = x.shape
84 |     x_n = np.zeros(x.shape)
85 |     for i in range(dim):
86 |         array = x[:, i]
87 |         avg = np.average(array)
88 |         std = np.std(array)
89 |         if std != 0:
90 |             x_n[:, i] = (array - avg) / std
91 |         else:
92 |             x_n[:, i] = array
93 |     return x_n
94 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wineQualityWhites-od2_gt_hbos.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 46,[5]
  3 | 98,[0]
  4 | 115,"[2, 8]"
  5 | 147,"[1, 2, 5]"
  6 | 172,"[1, 5]"
  7 | 176,[6]
  8 | 178,"[1, 2, 5]"
  9 | 189,[6]
 10 | 204,[0]
 11 | 207,"[0, 2]"
 12 | 230,"[1, 2, 5, 6]"
 13 | 250,[8]
 14 | 251,"[0, 3]"
 15 | 253,"[0, 5, 8]"
 16 | 259,"[0, 6]"
 17 | 278,[6]
 18 | 282,[6]
 19 | 294,[10]
 20 | 433,"[1, 2, 3, 4, 5, 10]"
 21 | 445,[6]
 22 | 496,"[5, 10]"
 23 | 499,"[5, 10]"
 24 | 526,[6]
 25 | 540,"[3, 5]"
 26 | 626,"[1, 2, 3, 4, 5, 10]"
 27 | 641,[6]
 28 | 646,"[2, 5]"
 29 | 659,[5]
 30 | 662,"[1, 2, 3, 4, 5, 6, 10]"
 31 | 687,"[1, 4]"
 32 | 690,"[5, 9]"
 33 | 702,[6]
 34 | 740,[6]
 35 | 780,"[2, 5]"
 36 | 831,"[1, 6, 8]"
 37 | 873,"[0, 6]"
 38 | 905,[0]
 39 | 906,"[0, 6]"
 40 | 908,"[0, 3, 5]"
 41 | 914,"[0, 6]"
 42 | 948,"[1, 5, 6]"
 43 | 991,"[0, 6]"
 44 | 993,[6]
 45 | 1027,"[1, 3, 5, 6, 9]"
 46 | 1029,[6]
 47 | 1034,"[4, 9]"
 48 | 1040,"[1, 5]"
 49 | 1042,"[1, 5]"
 50 | 1053,[0]
 51 | 1059,"[0, 4, 10]"
 52 | 1109,[0]
 53 | 1114,[6]
 54 | 1152,"[1, 2, 5, 6]"
 55 | 1154,"[1, 6, 8]"
 56 | 1155,"[5, 8]"
 57 | 1229,[0]
 58 | 1245,"[0, 1, 2, 3, 5]"
 59 | 1293,"[5, 9]"
 60 | 1294,"[5, 9]"
 61 | 1349,[0]
 62 | 1363,"[2, 5]"
 63 | 1405,"[1, 5]"
 64 | 1417,[6]
 65 | 1420,[0]
 66 | 1423,[6]
 67 | 1430,"[1, 5]"
 68 | 1474,[6]
 69 | 1483,[6]
 70 | 1484,[0]
 71 | 1541,"[1, 5, 6]"
 72 | 1558,"[1, 5]"
 73 | 1559,"[0, 6]"
 74 | 1574,"[0, 3, 5]"
 75 | 1577,"[1, 5]"
 76 | 1579,[6]
 77 | 1649,[8]
 78 | 1652,[0]
 79 | 1664,"[0, 3]"
 80 | 1688,[5]
 81 | 1690,[0]
 82 | 1702,[10]
 83 | 1708,"[1, 5]"
 84 | 1718,[0]
 85 | 1739,"[0, 6]"
 86 | 1781,"[5, 6, 8]"
 87 | 1817,"[1, 2]"
 88 | 1856,"[0, 1]"
 89 | 1924,[0]
 90 | 1931,[5]
 91 | 1951,[1]
 92 | 1990,"[1, 5]"
 93 | 2050,[0]
 94 | 2079,"[1, 5]"
 95 | 2116,[6]
 96 | 2119,[0]
 97 | 2154,"[0, 1, 3]"
 98 | 2156,[6]
 99 | 2159,[6]
100 | 2225,[3]
101 | 2237,[3]
102 | 2246,[3]
103 | 2275,[3]
104 | 2318,"[2, 3]"
105 | 2337,[5]
106 | 2346,"[2, 3]"
107 | 2372,[6]
108 | 2373,[6]
109 | 2379,"[4, 5, 6, 8]"
110 | 2380,"[4, 5, 6, 8]"
111 | 2386,[6]
112 | 2387,[6]
113 | 2388,[10]
114 | 2400,[0]
115 | 2401,[0]
116 | 2409,"[5, 9]"
117 | 2412,"[4, 6, 7, 9]"
118 | 2413,"[5, 9]"
119 | 2414,"[3, 4, 5, 6, 7, 8, 9, 10]"
120 | 2435,[6]
121 | 2493,"[3, 5]"
122 | 2494,"[3, 5]"
123 | 2502,[0]
124 | 2503,[0]
125 | 2531,"[1, 5]"
126 | 2532,"[1, 5]"
127 | 2589,"[1, 2, 5, 6]"
128 | 2656,[5]
129 | 2818,[5]
130 | 2888,[6]
131 | 2920,[6]
132 | 2935,"[5, 6, 9]"
133 | 3021,[6]
134 | 3050,[5]
135 | 3067,[6]
136 | 3087,[6]
137 | 3109,[6]
138 | 3179,[0]
139 | 3186,"[0, 6]"
140 | 3218,"[4, 5, 10]"
141 | 3265,"[0, 5, 6, 10]"
142 | 3275,[2]
143 | 3307,[5]
144 | 3409,"[2, 5, 8, 10]"
145 | 3417,"[1, 5, 6]"
146 | 3528,[6]
147 | 3559,[6]
148 | 3571,[6]
149 | 3578,[10]
150 | 3650,"[2, 8]"
151 | 3662,"[1, 2, 5, 6]"
152 | 3714,[8]
153 | 3736,"[3, 5, 6, 9, 10]"
154 | 3770,[10]
155 | 3810,[3]
156 | 3872,"[5, 10]"
157 | 3879,"[1, 5, 6]"
158 | 3901,"[0, 6]"
159 | 3933,"[0, 6]"
160 | 3965,[10]
161 | 3967,[6]
162 | 3973,[6]
163 | 4020,[10]
164 | 4039,[1]
165 | 4074,[6]
166 | 4212,[6]
167 | 4213,"[2, 4, 5, 6, 10]"
168 | 4217,"[2, 5]"
169 | 4222,"[2, 5]"
170 | 4223,[6]
171 | 4253,"[0, 2, 5, 6, 10]"
172 | 4278,[6]
173 | 4389,"[3, 5]"
174 | 4483,[0]
175 | 4508,[5]
176 | 4609,[6]
177 | 4680,"[1, 2, 5]"
178 | 4686,"[1, 2, 5]"
179 | 4745,[5]
180 | 4774,[10]
181 | 4779,"[1, 2, 5, 6]"
182 | 4804,[6]
183 | 4839,[6]
184 | 4878,"[1, 2, 5, 6]"
185 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wineQualityWhites-od2_gt_iforest.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 46,"[1, 5]"
  3 | 98,"[0, 3]"
  4 | 115,"[2, 5, 8]"
  5 | 147,"[1, 8]"
  6 | 172,"[1, 5, 8]"
  7 | 176,"[0, 2, 6, 10]"
  8 | 178,"[1, 2, 5]"
  9 | 189,"[6, 10]"
 10 | 204,"[0, 10]"
 11 | 207,"[0, 2]"
 12 | 230,"[1, 2]"
 13 | 250,"[7, 8]"
 14 | 251,"[0, 3]"
 15 | 253,"[5, 8, 10]"
 16 | 259,[5]
 17 | 278,[6]
 18 | 282,"[1, 5, 6, 9]"
 19 | 294,"[0, 1, 10]"
 20 | 433,"[1, 4, 5]"
 21 | 445,"[3, 6]"
 22 | 496,"[2, 5]"
 23 | 499,"[2, 5]"
 24 | 526,[6]
 25 | 540,"[3, 5, 6, 8]"
 26 | 626,"[0, 1, 2, 4, 10]"
 27 | 641,"[0, 3]"
 28 | 646,"[2, 5]"
 29 | 659,"[2, 5]"
 30 | 662,"[1, 2, 4, 6]"
 31 | 687,"[1, 4]"
 32 | 690,"[6, 9]"
 33 | 702,[2]
 34 | 740,[6]
 35 | 780,"[2, 5, 6, 8]"
 36 | 831,"[3, 8, 9, 10]"
 37 | 873,"[0, 6]"
 38 | 905,"[0, 2, 5]"
 39 | 906,"[0, 2, 5]"
 40 | 908,"[0, 3, 7, 8]"
 41 | 914,"[0, 5, 6, 10]"
 42 | 948,"[1, 6, 8]"
 43 | 991,"[0, 2, 6, 9]"
 44 | 993,"[2, 5, 6]"
 45 | 1027,"[1, 6, 9]"
 46 | 1029,"[1, 6]"
 47 | 1034,"[0, 4, 6]"
 48 | 1040,"[1, 6, 9]"
 49 | 1042,[1]
 50 | 1053,"[0, 7]"
 51 | 1059,"[0, 4, 10]"
 52 | 1109,"[0, 7]"
 53 | 1114,[6]
 54 | 1152,[2]
 55 | 1154,"[0, 8, 10]"
 56 | 1155,"[0, 8]"
 57 | 1229,"[0, 10]"
 58 | 1245,"[1, 2, 3, 6]"
 59 | 1293,[9]
 60 | 1294,[9]
 61 | 1349,"[0, 6, 8, 9]"
 62 | 1363,"[2, 5]"
 63 | 1405,"[4, 5, 9]"
 64 | 1417,"[6, 7, 8]"
 65 | 1420,"[0, 3]"
 66 | 1423,"[0, 2, 6, 9]"
 67 | 1430,"[3, 5, 9]"
 68 | 1474,"[6, 10]"
 69 | 1483,"[6, 10]"
 70 | 1484,"[5, 10]"
 71 | 1541,"[1, 6, 9, 10]"
 72 | 1558,"[1, 2, 5, 10]"
 73 | 1559,"[0, 2, 6]"
 74 | 1574,"[3, 8]"
 75 | 1577,"[1, 8]"
 76 | 1579,"[2, 3, 4, 6, 7, 8, 9]"
 77 | 1649,"[7, 8]"
 78 | 1652,"[0, 2, 3, 9]"
 79 | 1664,"[0, 3, 7, 8]"
 80 | 1688,[5]
 81 | 1690,"[0, 1, 5]"
 82 | 1702,"[5, 10]"
 83 | 1708,"[1, 8]"
 84 | 1718,"[0, 7]"
 85 | 1739,"[0, 5, 6, 8]"
 86 | 1781,"[3, 6, 8]"
 87 | 1817,[2]
 88 | 1856,"[0, 1]"
 89 | 1924,[5]
 90 | 1931,"[1, 5, 6]"
 91 | 1951,"[0, 1]"
 92 | 1990,[5]
 93 | 2050,"[0, 7]"
 94 | 2079,"[1, 8]"
 95 | 2116,"[6, 7, 8, 9]"
 96 | 2119,"[0, 1, 5, 8]"
 97 | 2154,"[0, 1]"
 98 | 2156,"[3, 5, 9]"
 99 | 2159,"[5, 6, 7, 9, 10]"
100 | 2225,"[0, 3, 4]"
101 | 2237,"[0, 2, 3, 4, 7]"
102 | 2246,"[0, 2, 3, 4, 7]"
103 | 2275,"[0, 3, 4, 10]"
104 | 2318,[2]
105 | 2337,[4]
106 | 2346,[2]
107 | 2372,[5]
108 | 2373,"[1, 5, 6, 8]"
109 | 2379,"[4, 5, 8]"
110 | 2380,"[4, 8, 9]"
111 | 2386,"[3, 6]"
112 | 2387,"[3, 6]"
113 | 2388,[5]
114 | 2400,"[0, 7]"
115 | 2401,"[0, 7]"
116 | 2409,"[8, 9]"
117 | 2412,"[0, 3, 4, 7, 8, 9]"
118 | 2413,"[8, 9]"
119 | 2414,"[0, 3, 4, 7, 8, 9]"
120 | 2435,"[5, 6]"
121 | 2493,"[2, 3, 5, 7]"
122 | 2494,"[2, 3, 5, 7]"
123 | 2502,"[0, 2, 5]"
124 | 2503,"[0, 2, 5]"
125 | 2531,"[1, 5]"
126 | 2532,"[1, 5]"
127 | 2589,"[1, 2]"
128 | 2656,"[6, 9]"
129 | 2818,"[1, 5, 8]"
130 | 2888,[6]
131 | 2920,"[3, 5]"
132 | 2935,[5]
133 | 3021,[5]
134 | 3050,"[3, 5, 6, 8, 10]"
135 | 3067,[2]
136 | 3087,"[0, 6]"
137 | 3109,"[6, 8]"
138 | 3179,"[0, 9]"
139 | 3186,"[0, 2, 6]"
140 | 3218,[4]
141 | 3265,"[0, 10]"
142 | 3275,[2]
143 | 3307,"[0, 5]"
144 | 3409,[3]
145 | 3417,"[1, 6]"
146 | 3528,"[1, 3, 6, 9]"
147 | 3559,[6]
148 | 3571,"[1, 2, 6]"
149 | 3578,[5]
150 | 3650,"[2, 5]"
151 | 3662,"[1, 6, 8]"
152 | 3714,[8]
153 | 3736,"[3, 6, 9, 10]"
154 | 3770,[4]
155 | 3810,"[3, 10]"
156 | 3872,[10]
157 | 3879,"[1, 5, 6, 9]"
158 | 3901,"[1, 2, 4, 5, 10]"
159 | 3933,"[0, 2, 6]"
160 | 3965,[1]
161 | 3967,[2]
162 | 3973,[2]
163 | 4020,[10]
164 | 4039,[1]
165 | 4074,"[2, 3, 5, 6]"
166 | 4212,"[5, 7, 10]"
167 | 4213,"[2, 4, 5, 6]"
168 | 4217,"[2, 9]"
169 | 4222,"[2, 9]"
170 | 4223,"[0, 1, 2, 5, 6, 7, 8]"
171 | 4253,"[0, 1, 2, 5, 8]"
172 | 4278,"[6, 10]"
173 | 4389,[9]
174 | 4483,"[2, 5]"
175 | 4508,"[5, 10]"
176 | 4609,"[1, 2, 6]"
177 | 4680,"[1, 2]"
178 | 4686,"[1, 2]"
179 | 4745,[5]
180 | 4774,[5]
181 | 4779,[2]
182 | 4804,"[2, 5, 6, 7, 10]"
183 | 4839,"[6, 8]"
184 | 4878,"[2, 5]"
185 | 


--------------------------------------------------------------------------------
/data_od_evaluation/optdigits_pca_gt_copod.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 5066,"[0, 4, 6, 8, 9]"
  3 | 5067,"[0, 3, 6, 8, 9]"
  4 | 5068,"[3, 4, 5, 6, 7, 8]"
  5 | 5069,"[0, 2, 4, 6]"
  6 | 5070,"[2, 3, 6]"
  7 | 5071,"[1, 2, 3, 5, 6]"
  8 | 5072,"[2, 3, 4, 5, 6, 7]"
  9 | 5073,"[0, 2, 3, 6, 8, 9]"
 10 | 5074,[6]
 11 | 5075,"[2, 3, 6]"
 12 | 5076,"[0, 2, 5, 6, 8]"
 13 | 5077,"[0, 3, 6, 8, 9]"
 14 | 5078,"[2, 3, 4, 6, 9]"
 15 | 5079,"[0, 3, 4, 6, 8]"
 16 | 5080,"[2, 3, 5, 6, 7]"
 17 | 5081,"[3, 4, 6, 7, 8]"
 18 | 5082,"[2, 3, 6]"
 19 | 5083,"[2, 3, 6]"
 20 | 5084,[6]
 21 | 5085,"[0, 6, 8, 9]"
 22 | 5086,"[2, 3, 5, 6, 7]"
 23 | 5087,"[2, 3, 6]"
 24 | 5088,"[0, 6, 8, 9]"
 25 | 5089,"[1, 2, 3, 4, 5, 6, 7]"
 26 | 5090,"[0, 3, 6, 8, 9]"
 27 | 5091,[6]
 28 | 5092,"[3, 5, 6, 7, 8]"
 29 | 5093,"[0, 3, 6, 8, 9]"
 30 | 5094,"[0, 6, 8, 9]"
 31 | 5095,"[3, 4, 5, 6, 7, 8]"
 32 | 5096,"[1, 4, 6, 7]"
 33 | 5097,"[0, 2, 3, 6, 8, 9]"
 34 | 5098,"[5, 6, 7, 8]"
 35 | 5099,"[2, 3, 6]"
 36 | 5100,"[5, 6]"
 37 | 5101,"[1, 2, 3, 6, 7]"
 38 | 5102,"[3, 4, 5, 6, 7, 8]"
 39 | 5103,"[1, 3, 6, 8]"
 40 | 5104,"[2, 3, 6]"
 41 | 5105,"[0, 2, 6]"
 42 | 5106,"[0, 3, 6, 8, 9]"
 43 | 5107,"[0, 3, 6, 8]"
 44 | 5108,"[0, 3, 6, 8, 9]"
 45 | 5109,"[2, 5, 6, 7]"
 46 | 5110,"[1, 2, 3, 4, 5, 6, 7]"
 47 | 5111,"[2, 3, 6]"
 48 | 5112,"[0, 3, 6, 8, 9]"
 49 | 5113,"[2, 3, 6]"
 50 | 5114,"[0, 2, 4, 6]"
 51 | 5115,"[2, 3, 6]"
 52 | 5116,"[2, 3, 5, 6, 7]"
 53 | 5117,[6]
 54 | 5118,"[2, 3, 6]"
 55 | 5119,"[0, 4, 6, 9]"
 56 | 5120,"[2, 5, 6, 7]"
 57 | 5121,"[0, 1, 2, 3, 5, 6, 8, 9]"
 58 | 5122,"[2, 3, 5, 6, 7]"
 59 | 5123,"[1, 2, 3, 5, 6, 7]"
 60 | 5124,"[2, 3, 6]"
 61 | 5125,"[2, 3, 6]"
 62 | 5126,"[2, 5, 6]"
 63 | 5127,"[2, 3, 5, 6, 7]"
 64 | 5128,"[2, 3, 6]"
 65 | 5129,"[0, 3, 6, 8, 9]"
 66 | 5130,"[3, 4, 5, 6, 7, 8]"
 67 | 5131,"[1, 2, 3, 6, 7]"
 68 | 5132,"[1, 2, 6, 7]"
 69 | 5133,"[1, 2, 3, 6, 9]"
 70 | 5134,"[2, 3, 4, 6, 7]"
 71 | 5135,"[2, 3, 4, 6, 7]"
 72 | 5136,[6]
 73 | 5137,"[0, 2, 5, 6, 8, 9]"
 74 | 5138,"[2, 3, 4, 6, 7]"
 75 | 5139,"[2, 3, 6]"
 76 | 5140,"[2, 3, 4, 5, 6, 7]"
 77 | 5141,"[3, 4, 5, 6, 7]"
 78 | 5142,"[0, 3, 6, 8, 9]"
 79 | 5143,"[2, 3, 6]"
 80 | 5144,"[3, 4, 6, 7, 8]"
 81 | 5145,"[2, 3, 6]"
 82 | 5146,"[0, 4, 6, 8, 9]"
 83 | 5147,"[0, 3, 4, 6, 9]"
 84 | 5148,"[0, 1, 4, 6, 8]"
 85 | 5149,"[5, 6]"
 86 | 5150,"[4, 6, 7, 8]"
 87 | 5151,"[0, 6, 8, 9]"
 88 | 5152,"[0, 6, 8, 9]"
 89 | 5153,"[2, 3, 5, 6, 7]"
 90 | 5154,"[2, 3, 6]"
 91 | 5155,"[4, 5, 6, 7]"
 92 | 5156,"[2, 4, 6, 7, 9]"
 93 | 5157,"[4, 6, 7, 8]"
 94 | 5158,"[3, 6, 8, 9]"
 95 | 5159,[6]
 96 | 5160,"[0, 3, 6, 8, 9]"
 97 | 5161,"[0, 3, 6, 8, 9]"
 98 | 5162,"[0, 2, 3, 4, 5, 6]"
 99 | 5163,"[2, 3, 6]"
100 | 5164,"[1, 2, 6, 7]"
101 | 5165,"[0, 2, 3, 6, 8, 9]"
102 | 5166,"[3, 6, 8, 9]"
103 | 5167,"[0, 3, 6, 8, 9]"
104 | 5168,"[5, 6, 7, 8]"
105 | 5169,"[2, 3, 6]"
106 | 5170,"[5, 6, 8]"
107 | 5171,"[0, 3, 6, 8]"
108 | 5172,"[0, 3, 4, 6, 8]"
109 | 5173,"[2, 3, 5, 6, 7]"
110 | 5174,"[2, 3, 6]"
111 | 5175,"[0, 3, 6, 8, 9]"
112 | 5176,"[1, 2, 3, 4, 5, 6, 7]"
113 | 5177,"[0, 6, 8, 9]"
114 | 5178,"[6, 8]"
115 | 5179,"[3, 4, 5, 6, 7]"
116 | 5180,"[1, 2, 6]"
117 | 5181,"[5, 6]"
118 | 5182,"[0, 3, 6, 8, 9]"
119 | 5183,"[1, 2, 3, 4, 5, 6, 7]"
120 | 5184,"[0, 6, 8, 9]"
121 | 5185,"[3, 5, 6, 7, 8]"
122 | 5186,"[3, 5, 6, 7, 8]"
123 | 5187,"[0, 3, 6, 8, 9]"
124 | 5188,"[0, 6, 8]"
125 | 5189,"[0, 3, 6, 8, 9]"
126 | 5190,"[0, 2, 3, 6, 9]"
127 | 5191,"[2, 3, 5, 6, 7]"
128 | 5192,"[2, 3, 6]"
129 | 5193,"[0, 4, 6]"
130 | 5194,"[6, 8]"
131 | 5195,"[2, 3, 6]"
132 | 5196,"[1, 2, 6, 7]"
133 | 5197,"[0, 3, 6, 8, 9]"
134 | 5198,"[0, 3, 6, 8, 9]"
135 | 5199,"[0, 6, 8, 9]"
136 | 5200,"[2, 5, 6]"
137 | 5201,"[2, 3, 5, 6, 7]"
138 | 5202,"[3, 5, 6, 7, 8, 9]"
139 | 5203,"[0, 4, 6, 8, 9]"
140 | 5204,"[0, 3, 6, 8, 9]"
141 | 5205,"[0, 3, 6, 8, 9]"
142 | 5206,"[3, 6, 9]"
143 | 5207,"[2, 3, 6]"
144 | 5208,"[0, 3, 6, 8, 9]"
145 | 5209,"[2, 3, 4, 6, 7]"
146 | 5210,"[0, 2, 3, 5, 6, 8, 9]"
147 | 5211,"[0, 6, 9]"
148 | 5212,"[0, 3, 6, 8, 9]"
149 | 5213,"[1, 4, 6, 7]"
150 | 5214,[6]
151 | 5215,[6]
152 | 


--------------------------------------------------------------------------------
/data_od_evaluation/optdigits_pca_gt_iforest.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 5066,"[2, 4, 6, 9]"
  3 | 5067,"[0, 1, 2, 3, 6, 8]"
  4 | 5068,"[1, 3, 4, 6, 7, 8]"
  5 | 5069,"[0, 1, 2, 5, 6]"
  6 | 5070,"[0, 2, 4, 6]"
  7 | 5071,"[1, 3, 6]"
  8 | 5072,"[0, 1, 2, 4, 6]"
  9 | 5073,"[0, 2, 4, 6, 8]"
 10 | 5074,[6]
 11 | 5075,"[0, 1, 2, 3, 6, 8]"
 12 | 5076,"[0, 2, 4, 6]"
 13 | 5077,"[1, 2, 4, 6]"
 14 | 5078,"[1, 2, 4, 6]"
 15 | 5079,"[0, 1, 2, 3, 6, 8]"
 16 | 5080,"[0, 1, 2, 3, 6, 8]"
 17 | 5081,"[0, 2, 3, 6, 8]"
 18 | 5082,"[0, 1, 2, 3, 6, 8]"
 19 | 5083,"[0, 1, 2, 3, 6, 8]"
 20 | 5084,"[1, 6]"
 21 | 5085,"[0, 1, 2, 5, 6]"
 22 | 5086,"[3, 6, 7]"
 23 | 5087,"[2, 4, 6]"
 24 | 5088,"[0, 6, 7, 8]"
 25 | 5089,"[3, 6, 7]"
 26 | 5090,"[0, 1, 2, 3, 6, 8]"
 27 | 5091,"[1, 6]"
 28 | 5092,"[0, 2, 6, 7, 8]"
 29 | 5093,"[0, 2, 3, 5, 6]"
 30 | 5094,"[0, 2, 6]"
 31 | 5095,"[3, 6, 7]"
 32 | 5096,"[4, 6, 7]"
 33 | 5097,"[0, 2, 6, 8]"
 34 | 5098,"[0, 2, 4, 6, 8]"
 35 | 5099,"[2, 6]"
 36 | 5100,"[1, 3, 6]"
 37 | 5101,"[0, 1, 2, 3, 6, 8]"
 38 | 5102,"[0, 3, 4, 6, 7, 8]"
 39 | 5103,"[3, 6, 8]"
 40 | 5104,"[0, 1, 2, 6]"
 41 | 5105,"[0, 2, 4, 6]"
 42 | 5106,"[0, 1, 2, 3, 6, 8]"
 43 | 5107,"[6, 8]"
 44 | 5108,"[2, 3, 5, 6, 8]"
 45 | 5109,"[6, 9]"
 46 | 5110,"[3, 6, 7]"
 47 | 5111,"[0, 1, 2, 3, 6, 8]"
 48 | 5112,"[0, 1, 2, 3, 6, 8]"
 49 | 5113,"[0, 1, 2, 3, 6, 8]"
 50 | 5114,"[0, 1, 2, 6]"
 51 | 5115,"[0, 1, 2, 3, 6, 8]"
 52 | 5116,"[0, 1, 2, 3, 6, 8]"
 53 | 5117,"[0, 6, 9]"
 54 | 5118,"[0, 1, 2, 3, 6, 8]"
 55 | 5119,"[0, 2, 3, 6, 8, 9]"
 56 | 5120,"[6, 9]"
 57 | 5121,"[0, 1, 2, 3, 6, 8]"
 58 | 5122,"[0, 2, 4, 6]"
 59 | 5123,"[1, 6]"
 60 | 5124,"[0, 1, 2, 6]"
 61 | 5125,"[0, 1, 2, 3, 6, 8]"
 62 | 5126,"[0, 1, 2, 4, 6]"
 63 | 5127,"[1, 2, 6, 7]"
 64 | 5128,"[0, 2, 3, 4, 6]"
 65 | 5129,"[6, 8]"
 66 | 5130,"[1, 3, 4, 6, 7, 8]"
 67 | 5131,"[1, 2, 3, 6]"
 68 | 5132,"[3, 4, 6, 7]"
 69 | 5133,"[1, 2, 3, 6]"
 70 | 5134,"[0, 1, 2, 3, 6, 8]"
 71 | 5135,"[2, 3, 4, 5, 6]"
 72 | 5136,"[1, 4, 6]"
 73 | 5137,"[0, 2, 3, 5, 6]"
 74 | 5138,"[2, 3, 4, 5, 6]"
 75 | 5139,"[0, 1, 2, 3, 6, 8]"
 76 | 5140,"[6, 9]"
 77 | 5141,"[3, 4, 6, 7]"
 78 | 5142,"[0, 1, 2, 3, 6, 8]"
 79 | 5143,"[2, 6, 8, 9]"
 80 | 5144,"[0, 3, 4, 6, 7, 8]"
 81 | 5145,"[2, 6]"
 82 | 5146,"[2, 4, 6, 9]"
 83 | 5147,"[0, 1, 2, 4, 6]"
 84 | 5148,"[0, 1, 6, 8]"
 85 | 5149,"[1, 3, 6]"
 86 | 5150,"[0, 3, 4, 6, 7]"
 87 | 5151,"[1, 3, 6]"
 88 | 5152,"[0, 2, 6, 8]"
 89 | 5153,"[6, 8]"
 90 | 5154,"[0, 2, 6]"
 91 | 5155,"[4, 6, 7]"
 92 | 5156,"[1, 2, 6, 7]"
 93 | 5157,"[0, 3, 4, 6, 7]"
 94 | 5158,"[1, 2, 3, 6, 7, 8, 9]"
 95 | 5159,"[0, 1, 2, 5, 6]"
 96 | 5160,"[0, 1, 2, 3, 6, 8]"
 97 | 5161,"[0, 1, 2, 3, 6, 8]"
 98 | 5162,"[0, 2, 3, 5, 6]"
 99 | 5163,"[3, 6]"
100 | 5164,"[3, 4, 6, 7]"
101 | 5165,"[0, 2, 6, 8]"
102 | 5166,"[1, 2, 3, 6, 7, 8, 9]"
103 | 5167,"[0, 2, 4, 6]"
104 | 5168,"[5, 6, 9]"
105 | 5169,"[0, 2, 4, 6]"
106 | 5170,"[6, 8]"
107 | 5171,"[0, 3, 6, 8]"
108 | 5172,"[0, 3, 4, 6]"
109 | 5173,"[3, 5, 6]"
110 | 5174,"[0, 2, 4, 6]"
111 | 5175,"[0, 1, 2, 3, 6, 8]"
112 | 5176,"[3, 5, 6, 7]"
113 | 5177,"[0, 1, 2, 5, 6]"
114 | 5178,"[1, 2, 6]"
115 | 5179,"[3, 6, 7]"
116 | 5180,"[1, 3, 6]"
117 | 5181,"[1, 3, 6]"
118 | 5182,"[3, 6, 9]"
119 | 5183,"[0, 2, 4, 6]"
120 | 5184,"[0, 1, 2, 6, 9]"
121 | 5185,"[0, 3, 5, 6, 8, 9]"
122 | 5186,"[1, 3, 5, 6, 7, 8]"
123 | 5187,"[0, 1, 2, 3, 6, 8]"
124 | 5188,"[0, 2, 4, 6, 8]"
125 | 5189,"[0, 2, 4, 6, 8]"
126 | 5190,"[0, 1, 2, 4, 6]"
127 | 5191,"[0, 2, 3, 5, 6, 7]"
128 | 5192,"[0, 1, 2, 3, 6, 8]"
129 | 5193,"[0, 1, 4, 6]"
130 | 5194,"[1, 2, 6]"
131 | 5195,"[0, 1, 2, 3, 6, 8]"
132 | 5196,"[1, 2, 6, 7]"
133 | 5197,"[0, 3, 6, 7, 8, 9]"
134 | 5198,"[6, 8]"
135 | 5199,"[0, 6, 8]"
136 | 5200,"[0, 1, 2, 4, 6]"
137 | 5201,"[3, 4, 6, 7]"
138 | 5202,"[1, 2, 3, 6, 7, 8, 9]"
139 | 5203,"[2, 4, 6, 9]"
140 | 5204,"[1, 3, 6, 8, 9]"
141 | 5205,"[0, 1, 2, 3, 6, 8]"
142 | 5206,"[0, 3, 4, 6, 9]"
143 | 5207,"[0, 1, 2, 3, 6, 8]"
144 | 5208,"[0, 2, 3, 4, 6, 8, 9]"
145 | 5209,"[0, 2, 3, 6, 7]"
146 | 5210,"[0, 2, 3, 5, 8, 9]"
147 | 5211,"[0, 2, 4, 6]"
148 | 5212,"[2, 6, 8, 9]"
149 | 5213,"[6, 9]"
150 | 5214,"[1, 6]"
151 | 5215,[6]
152 | 


--------------------------------------------------------------------------------
/model_iml/SHAP.py:
--------------------------------------------------------------------------------
  1 | import shap
  2 | import math
  3 | import random
  4 | import sklearn
  5 | import numpy as np
  6 | 
  7 | 
  8 | class SHAP:
  9 |     def __init__(self, kernel="rbf", n_sample=100, threshold=0.8):
 10 |         """
 11 | 
 12 |         :param kernel: clf model svm parameter
 13 |         :param threshold: threshold is used to filter feature subset for each data, the shap values of selected feature
 14 |         subspace accounts for [threshold] of the sum of the shap values of feature full space.
 15 |         """
 16 |         self.ano_idx = None
 17 | 
 18 |         self.kernel = kernel
 19 |         self.threshold = threshold
 20 |         self.n_sample = n_sample
 21 |         self.dim = None
 22 |         return
 23 | 
 24 |     def fit(self, x, y):
 25 | 
 26 |         self.dim = x.shape[1]
 27 | 
 28 |         # metric_lst = []
 29 |         # clf_lst = []
 30 |         # for model in classifiers.keys():
 31 |         #     clf = classifiers[model]
 32 |         #     clf.fit(x, y)
 33 |         #     y_pred = clf.predict(x)
 34 |         #     clf_lst.append(clf)
 35 |         #     metric_lst.append(sklearn.metrics.f1_score(y, y_pred))
 36 |         # choose_idx = int(np.argmax(metric_lst))
 37 |         # clf = clf_lst[choose_idx]
 38 |         # print("Choosing Clf: [%s]" % list(classifiers.keys())[choose_idx])
 39 | 
 40 |         clf = sklearn.svm.SVC(kernel=self.kernel, probability=True)
 41 |         clf.fit(x, y)
 42 | 
 43 |         y_pred = clf.predict(x)
 44 |         print("Clf model accuracy: [{:.4f}]".format(sklearn.metrics.accuracy_score(y, y_pred)))
 45 | 
 46 |         self.ano_idx = np.where(y == 1)[0]
 47 | 
 48 |         # use Kernel SHAP to explain test set predictions
 49 |         # As instructed by SHAP, Using many background data samples could cause slower run times.
 50 |         # we use shap.kmeans(data, K) to summarize the background as 100 samples.
 51 | 
 52 |         x_kmean = shap.kmeans(x, self.n_sample)
 53 |         explainer = shap.KernelExplainer(clf.predict_proba, x_kmean, link="logit")
 54 |         anomaly_shap_values = explainer.shap_values(x[self.ano_idx], nsamples="auto")
 55 | 
 56 |         anomaly_shap_values = anomaly_shap_values[1]
 57 |         return anomaly_shap_values
 58 | 
 59 |     def weight2subspace(self, weight, r=0.7, num=-1):
 60 |         threshold = r * np.sum(weight)
 61 |         tmp_s = 0
 62 |         exp_subspace = []
 63 |         sorted_idx1 = np.argsort(weight)
 64 |         sorted_idx = [sorted_idx1[self.dim - i -1] for i in range(self.dim)]
 65 |         if num != -1:
 66 |             exp_subspace = sorted_idx[:num]
 67 |             exp_subspace = list(np.sort(exp_subspace))
 68 |             return exp_subspace
 69 | 
 70 |         for idx in sorted_idx:
 71 |             tmp_s += weight[idx]
 72 |             exp_subspace.append(idx)
 73 |             if tmp_s >= threshold:
 74 |                 break
 75 |         exp_subspace = list(np.sort(exp_subspace))
 76 |         return exp_subspace
 77 | 
 78 |     def weight2subspace_pn(self, weight):
 79 |         exp_subspace = []
 80 |         for i in range(len(weight)):
 81 |             if weight[i] > 0:
 82 |                 exp_subspace.append(i)
 83 |         exp_subspace = list(np.sort(exp_subspace))
 84 |         return exp_subspace
 85 | 
 86 |     def get_exp_subspace(self, fea_weight_lst, w2s_ratio, real_exp_len=None):
 87 |         exp_subspace_lst = []
 88 |         for ii, idx in enumerate(self.ano_idx):
 89 |             fea_weight = fea_weight_lst[ii]
 90 |             if w2s_ratio == "real_len":
 91 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, num=real_exp_len[ii]))
 92 |             elif w2s_ratio == "auto":
 93 |                 r = math.sqrt(2 / self.dim)
 94 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, r=r))
 95 |             elif w2s_ratio == "pn":
 96 |                 exp_subspace_lst.append(self.weight2subspace_pn(fea_weight))
 97 |             else:
 98 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, r=w2s_ratio))
 99 |         return exp_subspace_lst
100 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # ------------------- path of datasets and annotations ----------------- #
 2 | root = ''
 3 | eva_root = ''
 4 | 
 5 | 
 6 | def get_parser(algorithm_name, parser):
 7 |     if algorithm_name == "aton":
 8 |         parser.add_argument('--nbrs_num', type=int, default=30, help='')
 9 |         parser.add_argument('--rand_num', type=int, default=30, help='')
10 |         parser.add_argument('--alpha1', type=float, default=0.8, help='triplet loss factor in loss function')
11 |         parser.add_argument('--alpha2', type=float, default=0.2, help='dis loss factor in loss function')
12 |         parser.add_argument('--n_epoch', type=int, default=10, help='')
13 |         parser.add_argument('--batch_size', type=int, default=512, help='')
14 |         parser.add_argument('--lr', type=float, default=0.1, help='')
15 |         parser.add_argument('--n_linear', type=int, default=64, help='')
16 |         parser.add_argument('--margin', type=float, default=5., help='')
17 |     elif algorithm_name == "shap":
18 |         parser.add_argument('--kernel', type=str, default='rbf', help='')
19 |         parser.add_argument("--n_sample", type=int, default=100, help='')
20 |         parser.add_argument("--threshold", type=int, default=-1, help='')
21 |     elif algorithm_name == "lime":
22 |         parser.add_argument('--discretize_continuous', type=bool, default=True, help='')
23 |         parser.add_argument("--discretizer", type=str, default="quartile", help='')
24 |     elif algorithm_name == "intgrad":
25 |         parser.add_argument('--n_steps', type=int, default=40, help='')
26 |         parser.add_argument('--method', type=str, default="gausslegendre", help='')
27 |     elif algorithm_name == "coin":
28 |         parser.add_argument('--AUG', type=float, default=10, help='an additional attribute value as augmentation')
29 |         parser.add_argument('--ratio_nbr', type=float, default=0.08,
30 |                             help='controls number of neighbors to use in kneighbors queries')
31 |         parser.add_argument('--MIN_CLUSTER_SIZE', type=int, default=5,
32 |                             help='minimum number of samples required in a cluster')
33 |         parser.add_argument('--MAX_NUM_CLUSTER', type=int, default=4,
34 |                             help='maximum number of clusters for each context')
35 |         parser.add_argument('--VAL_TIMES', type=int, default=10,
36 |                             help='number of iterations for computing prediction strength')
37 |         parser.add_argument('--C_SVM', type=float, default=1., help='penalty parameter for svm')
38 |         parser.add_argument('--DEFK', type=int, default=0,
39 |                             help='pre-determined number of clusters in each context (use prediction strength if 0)')
40 |         parser.add_argument('--THRE_PS', type=float, default=0.85,
41 |                             help='threshold for deciding the best cluster value in prediction strength')
42 |     elif algorithm_name == "aton_ablation" or algorithm_name == "aton_ablation2" or algorithm_name == "aton_ablation3":
43 |         parser.add_argument('--nbrs_num', type=int, default=30, help='')
44 |         parser.add_argument('--rand_num', type=int, default=30, help='')
45 |         parser.add_argument('--n_epoch', type=int, default=10, help='')
46 |         parser.add_argument('--batch_size', type=int, default=64, help='')
47 |         parser.add_argument('--lr', type=float, default=0.1, help='')
48 |         parser.add_argument('--n_linear', type=int, default=64, help='')
49 |         parser.add_argument('--margin', type=float, default=5., help='')
50 |     elif algorithm_name == "sinne":
51 |         parser.add_argument('--max_level', default='full', help='')
52 |         parser.add_argument("--width", type=int, default=10, help='')
53 |         parser.add_argument("--ensemble_num", type=int, default=100, help='')
54 |         parser.add_argument("--sample_num", type=int, default=8, help='')
55 |         parser.add_argument("--pretrain", type=bool, default=False, help='')
56 |         parser.add_argument("--verbose", type=bool, default=False, help='')
57 |     else:
58 |         raise NotImplementedError("not supported algorithm")
59 |     return parser
60 | 
61 | 


--------------------------------------------------------------------------------
/data_od_evaluation/wineQualityWhites-od2_gt_copod.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 46,"[1, 4, 5, 8]"
  3 | 98,"[0, 2, 5, 8]"
  4 | 115,"[1, 2, 4, 5, 8]"
  5 | 147,"[1, 4, 8]"
  6 | 172,"[1, 5, 8]"
  7 | 176,"[0, 6, 8, 10]"
  8 | 178,"[1, 2, 4, 10]"
  9 | 189,"[3, 6, 9, 10]"
 10 | 204,"[0, 2, 4, 8, 9]"
 11 | 207,"[0, 2]"
 12 | 230,"[1, 2, 4, 8]"
 13 | 250,"[8, 9]"
 14 | 251,"[0, 3, 4, 6, 8]"
 15 | 253,"[2, 5, 8]"
 16 | 259,"[3, 5, 10]"
 17 | 278,"[1, 6, 8]"
 18 | 282,"[1, 6, 9]"
 19 | 294,"[0, 1, 4, 10]"
 20 | 433,"[1, 2, 4]"
 21 | 445,"[0, 3, 6, 8, 10]"
 22 | 496,"[1, 2, 5]"
 23 | 499,"[1, 2, 5]"
 24 | 526,"[5, 10]"
 25 | 540,"[3, 6, 8, 9, 10]"
 26 | 626,"[0, 1, 2, 4, 8]"
 27 | 641,"[4, 6, 7, 8, 10]"
 28 | 646,"[2, 4, 5, 9, 10]"
 29 | 659,"[2, 5]"
 30 | 662,"[1, 4, 8]"
 31 | 687,"[1, 4]"
 32 | 690,"[8, 9]"
 33 | 702,"[1, 2]"
 34 | 740,"[4, 6, 8, 10]"
 35 | 780,"[1, 2, 5, 8]"
 36 | 831,"[6, 9, 10]"
 37 | 873,"[0, 6, 8]"
 38 | 905,"[0, 2, 4, 5, 9]"
 39 | 906,"[0, 2, 4, 5, 9]"
 40 | 908,"[0, 4, 7, 8]"
 41 | 914,"[0, 1, 4, 5, 8]"
 42 | 948,"[1, 6, 8]"
 43 | 991,"[0, 2, 5, 9]"
 44 | 993,"[2, 5, 8]"
 45 | 1027,"[0, 1, 2, 6, 9]"
 46 | 1029,"[1, 6]"
 47 | 1034,"[1, 2, 4, 6]"
 48 | 1040,"[1, 5, 9]"
 49 | 1042,[1]
 50 | 1053,"[0, 2, 4, 9]"
 51 | 1059,"[0, 2, 4, 5, 8, 10]"
 52 | 1109,"[0, 4]"
 53 | 1114,"[1, 6, 8]"
 54 | 1152,"[1, 2]"
 55 | 1154,"[0, 6, 8, 10]"
 56 | 1155,"[0, 6, 8]"
 57 | 1229,"[0, 10]"
 58 | 1245,"[1, 2, 6, 9]"
 59 | 1293,"[1, 5, 9]"
 60 | 1294,"[1, 5, 9]"
 61 | 1349,"[0, 3, 6, 8, 9]"
 62 | 1363,"[1, 2, 4, 5]"
 63 | 1405,"[1, 4, 5, 9]"
 64 | 1417,"[1, 6]"
 65 | 1420,"[0, 2, 7]"
 66 | 1423,"[0, 2, 4, 6]"
 67 | 1430,"[1, 5, 9]"
 68 | 1474,"[4, 6, 8]"
 69 | 1483,"[4, 6, 8]"
 70 | 1484,"[0, 4, 5, 10]"
 71 | 1541,"[1, 6, 9]"
 72 | 1558,"[1, 2, 5, 9]"
 73 | 1559,"[0, 2, 6, 8, 9]"
 74 | 1574,"[0, 2, 4, 8, 9]"
 75 | 1577,"[1, 2]"
 76 | 1579,"[2, 4, 5, 6, 7, 9, 10]"
 77 | 1649,"[2, 8]"
 78 | 1652,"[0, 2, 3]"
 79 | 1664,"[0, 4, 7]"
 80 | 1688,"[5, 8, 10]"
 81 | 1690,"[0, 1, 4, 5]"
 82 | 1702,"[5, 10]"
 83 | 1708,"[1, 8]"
 84 | 1718,"[0, 10]"
 85 | 1739,"[0, 3, 5, 8, 10]"
 86 | 1781,"[4, 6, 8]"
 87 | 1817,"[1, 2, 5]"
 88 | 1856,"[0, 1, 4]"
 89 | 1924,"[0, 2, 4, 5, 9]"
 90 | 1931,"[1, 5]"
 91 | 1951,"[0, 1]"
 92 | 1990,"[1, 5, 8]"
 93 | 2050,"[0, 7]"
 94 | 2079,"[1, 8]"
 95 | 2116,"[3, 6, 9, 10]"
 96 | 2119,"[0, 2, 4, 5, 9]"
 97 | 2154,"[0, 1, 2, 4]"
 98 | 2156,"[0, 5, 9, 10]"
 99 | 2159,"[2, 4, 5, 6, 7, 8, 9, 10]"
100 | 2225,"[0, 3, 4]"
101 | 2237,"[3, 4, 6]"
102 | 2246,"[3, 4, 6]"
103 | 2275,"[3, 4, 6]"
104 | 2318,"[0, 2, 5, 7, 10]"
105 | 2337,"[2, 4, 5, 9, 10]"
106 | 2346,"[0, 2, 5, 7, 10]"
107 | 2372,"[2, 5, 8, 10]"
108 | 2373,"[1, 2, 5]"
109 | 2379,"[4, 8, 9]"
110 | 2380,"[4, 8, 9]"
111 | 2386,"[4, 6, 8]"
112 | 2387,"[4, 6, 8]"
113 | 2388,"[0, 3, 5, 10]"
114 | 2400,"[0, 2, 5, 9]"
115 | 2401,"[0, 2, 9]"
116 | 2409,"[8, 9]"
117 | 2412,"[4, 6, 7, 8, 9]"
118 | 2413,"[8, 9]"
119 | 2414,"[4, 8, 9]"
120 | 2435,"[1, 2, 5]"
121 | 2493,"[0, 4, 7]"
122 | 2494,"[0, 4, 7]"
123 | 2502,"[0, 3, 5]"
124 | 2503,"[0, 3, 5]"
125 | 2531,"[1, 5]"
126 | 2532,"[1, 5]"
127 | 2589,"[1, 2, 4]"
128 | 2656,"[2, 5, 9]"
129 | 2818,"[1, 2, 5, 8]"
130 | 2888,"[0, 3, 5]"
131 | 2920,"[0, 2, 3, 5, 8, 10]"
132 | 2935,"[2, 4, 5, 9, 10]"
133 | 3021,"[5, 8, 10]"
134 | 3050,"[2, 5, 8]"
135 | 3067,"[0, 2, 3, 10]"
136 | 3087,"[3, 6]"
137 | 3109,"[4, 6, 8]"
138 | 3179,"[0, 2, 4, 8, 9]"
139 | 3186,"[0, 2, 5, 9]"
140 | 3218,"[4, 8]"
141 | 3265,"[0, 5, 10]"
142 | 3275,"[1, 2]"
143 | 3307,"[0, 5]"
144 | 3409,"[3, 8]"
145 | 3417,"[1, 5, 10]"
146 | 3528,"[1, 6, 9]"
147 | 3559,"[0, 6, 8, 10]"
148 | 3571,"[1, 2, 6, 8]"
149 | 3578,"[5, 10]"
150 | 3650,"[2, 5, 9]"
151 | 3662,"[1, 5, 8]"
152 | 3714,"[0, 8]"
153 | 3736,"[2, 4, 5, 9, 10]"
154 | 3770,"[4, 9, 10]"
155 | 3810,"[3, 4, 6]"
156 | 3872,"[0, 2, 5, 7, 10]"
157 | 3879,"[1, 6, 9]"
158 | 3901,"[4, 6, 10]"
159 | 3933,"[0, 6, 8, 10]"
160 | 3965,"[0, 1, 2, 4, 9]"
161 | 3967,"[2, 4, 6, 9]"
162 | 3973,"[2, 4, 6, 9]"
163 | 4020,"[2, 4, 5, 10]"
164 | 4039,[1]
165 | 4074,"[1, 2, 4, 5]"
166 | 4212,"[4, 6, 7, 8, 10]"
167 | 4213,"[1, 2, 4, 5]"
168 | 4217,"[1, 2, 4]"
169 | 4222,"[1, 2, 4]"
170 | 4223,"[1, 6, 8]"
171 | 4253,"[0, 1, 2, 5, 8]"
172 | 4278,"[0, 1, 5, 6]"
173 | 4389,"[3, 9, 10]"
174 | 4483,"[0, 2, 4, 5, 9]"
175 | 4508,"[5, 10]"
176 | 4609,"[1, 2, 5]"
177 | 4680,"[1, 2, 4]"
178 | 4686,"[1, 2, 4]"
179 | 4745,[5]
180 | 4774,"[5, 10]"
181 | 4779,"[1, 2]"
182 | 4804,"[2, 3, 5, 10]"
183 | 4839,"[0, 6, 8, 10]"
184 | 4878,"[1, 2, 5]"
185 | 


--------------------------------------------------------------------------------
/data_od_evaluation/pima_gt_hbos.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 0,[7]
  3 | 2,[1]
  4 | 4,"[2, 6]"
  5 | 6,"[1, 4]"
  6 | 8,"[1, 4]"
  7 | 9,"[1, 5, 7]"
  8 | 11,"[0, 1, 7]"
  9 | 13,[4]
 10 | 14,"[1, 4]"
 11 | 15,[2]
 12 | 16,[4]
 13 | 17,[0]
 14 | 19,[4]
 15 | 22,[1]
 16 | 23,[0]
 17 | 24,"[0, 1, 2, 4]"
 18 | 25,"[0, 4, 7]"
 19 | 26,"[1, 7]"
 20 | 31,[4]
 21 | 37,[7]
 22 | 38,[3]
 23 | 39,"[3, 4, 6, 7]"
 24 | 43,"[2, 4]"
 25 | 45,"[1, 6]"
 26 | 48,[0]
 27 | 53,"[4, 7]"
 28 | 56,"[1, 4]"
 29 | 61,[7]
 30 | 64,[7]
 31 | 66,"[2, 3, 7]"
 32 | 70,[4]
 33 | 72,"[0, 1, 2]"
 34 | 78,[2]
 35 | 84,"[2, 5]"
 36 | 88,"[0, 4]"
 37 | 93,[7]
 38 | 99,"[3, 4]"
 39 | 100,"[1, 6, 7]"
 40 | 109,"[1, 3]"
 41 | 110,"[1, 4]"
 42 | 111,[4]
 43 | 114,"[1, 4]"
 44 | 115,[7]
 45 | 116,[7]
 46 | 120,"[1, 3, 4]"
 47 | 124,[2]
 48 | 125,"[1, 2, 3, 5]"
 49 | 128,"[2, 3, 4, 7]"
 50 | 129,[7]
 51 | 130,"[1, 4]"
 52 | 131,"[0, 2, 6]"
 53 | 132,"[1, 4]"
 54 | 143,"[0, 7]"
 55 | 152,"[0, 2, 3, 4, 5, 6, 7]"
 56 | 154,"[1, 5]"
 57 | 155,"[1, 3, 5, 7]"
 58 | 159,[0]
 59 | 164,[2]
 60 | 165,"[0, 2, 3, 4, 7]"
 61 | 170,"[0, 7]"
 62 | 171,"[0, 4]"
 63 | 175,"[1, 4]"
 64 | 177,[5]
 65 | 179,[7]
 66 | 185,[1]
 67 | 186,"[1, 4, 7]"
 68 | 187,"[2, 3]"
 69 | 188,[4]
 70 | 189,[4]
 71 | 192,"[1, 7]"
 72 | 193,"[0, 5]"
 73 | 195,[4]
 74 | 197,[3]
 75 | 198,"[3, 4]"
 76 | 199,[4]
 77 | 206,"[1, 4]"
 78 | 207,"[1, 2, 7]"
 79 | 209,[1]
 80 | 213,"[1, 4, 5]"
 81 | 214,[4]
 82 | 215,[4]
 83 | 216,"[3, 4]"
 84 | 218,"[3, 6, 7]"
 85 | 219,[7]
 86 | 220,[4]
 87 | 221,[7]
 88 | 227,[1]
 89 | 230,"[1, 2, 5]"
 90 | 231,[4]
 91 | 235,[1]
 92 | 236,"[1, 4]"
 93 | 237,"[1, 2, 5]"
 94 | 238,[1]
 95 | 242,[2]
 96 | 243,"[0, 2, 3, 4, 6]"
 97 | 245,"[0, 1, 3, 6, 7]"
 98 | 254,[4]
 99 | 255,[3]
100 | 259,"[0, 6]"
101 | 261,[2]
102 | 264,[7]
103 | 266,[2]
104 | 269,[2]
105 | 270,"[0, 2, 3, 5, 6, 7]"
106 | 276,"[0, 2, 3]"
107 | 280,[1]
108 | 283,"[1, 2, 7]"
109 | 284,[7]
110 | 287,[4]
111 | 291,"[3, 6]"
112 | 292,[4]
113 | 293,"[2, 3, 4]"
114 | 296,[4]
115 | 298,"[0, 4, 7]"
116 | 300,[2]
117 | 301,"[1, 4]"
118 | 303,"[2, 5]"
119 | 306,"[1, 4, 7]"
120 | 308,[4]
121 | 309,[4]
122 | 312,"[1, 4]"
123 | 314,"[0, 3, 5, 6, 7]"
124 | 317,[1]
125 | 319,"[1, 7]"
126 | 321,"[0, 2, 3]"
127 | 322,[7]
128 | 323,"[0, 2, 3]"
129 | 326,[4]
130 | 328,"[4, 5]"
131 | 332,"[1, 2]"
132 | 337,[7]
133 | 338,[4]
134 | 339,[1]
135 | 349,[1]
136 | 355,"[1, 2, 7]"
137 | 356,"[2, 3, 4]"
138 | 357,"[0, 2, 3]"
139 | 359,"[1, 4]"
140 | 360,"[1, 4]"
141 | 363,[7]
142 | 366,[0]
143 | 369,[2]
144 | 370,[6]
145 | 375,"[4, 7]"
146 | 378,[5]
147 | 386,[7]
148 | 387,[2]
149 | 388,"[4, 7]"
150 | 391,[1]
151 | 394,"[0, 1, 2, 6]"
152 | 397,[3]
153 | 399,[1]
154 | 400,[7]
155 | 402,"[3, 4, 7]"
156 | 404,[1]
157 | 406,[7]
158 | 408,"[0, 1, 5, 6, 7]"
159 | 409,[4]
160 | 414,[4]
161 | 415,[4]
162 | 417,"[1, 7]"
163 | 419,[4]
164 | 424,[4]
165 | 425,"[1, 4]"
166 | 427,"[1, 4]"
167 | 429,[4]
168 | 435,[2]
169 | 440,[2]
170 | 443,"[0, 6]"
171 | 444,[3]
172 | 445,"[1, 5]"
173 | 448,[3]
174 | 451,[1]
175 | 455,"[0, 1]"
176 | 458,"[4, 7]"
177 | 468,[2]
178 | 476,[4]
179 | 480,[4]
180 | 484,[2]
181 | 485,[4]
182 | 493,"[3, 4, 6, 7]"
183 | 498,"[1, 4, 7]"
184 | 502,[1]
185 | 506,"[1, 4]"
186 | 510,"[0, 7]"
187 | 515,"[1, 4]"
188 | 516,"[4, 7]"
189 | 523,"[0, 7]"
190 | 535,[2]
191 | 539,"[2, 3, 4]"
192 | 540,[4]
193 | 541,[4]
194 | 542,[7]
195 | 545,"[1, 4]"
196 | 546,"[1, 4, 7]"
197 | 560,[7]
198 | 561,"[1, 4]"
199 | 569,[4]
200 | 577,[5]
201 | 579,[3]
202 | 580,"[1, 2, 3]"
203 | 584,[4]
204 | 586,"[1, 7]"
205 | 588,"[1, 4, 7]"
206 | 590,"[0, 2, 3]"
207 | 592,[7]
208 | 595,"[1, 4]"
209 | 598,[1]
210 | 603,"[4, 7]"
211 | 604,"[1, 2]"
212 | 606,"[1, 4]"
213 | 611,"[1, 4]"
214 | 612,[4]
215 | 614,"[0, 4, 7]"
216 | 618,"[0, 2, 3, 6, 7]"
217 | 619,[2]
218 | 630,[7]
219 | 635,[0]
220 | 638,"[0, 4, 5, 6]"
221 | 642,[7]
222 | 646,"[1, 4]"
223 | 647,"[1, 4]"
224 | 648,"[0, 4]"
225 | 655,[4]
226 | 659,[6]
227 | 661,"[1, 2, 3, 6]"
228 | 662,"[1, 2, 3]"
229 | 663,"[0, 2, 3, 4, 7]"
230 | 664,"[0, 2, 3, 7]"
231 | 666,[7]
232 | 667,"[0, 7]"
233 | 675,[1]
234 | 676,[7]
235 | 678,[2]
236 | 681,"[1, 5]"
237 | 683,[0]
238 | 689,"[1, 3, 4, 5, 7]"
239 | 691,"[0, 1, 2]"
240 | 693,"[3, 4, 7]"
241 | 695,[4]
242 | 696,"[1, 4]"
243 | 701,[7]
244 | 702,"[1, 2, 3, 7]"
245 | 706,[5]
246 | 708,"[0, 1, 7]"
247 | 709,[4]
248 | 712,"[0, 5, 7]"
249 | 715,"[1, 4]"
250 | 716,"[1, 4]"
251 | 719,[7]
252 | 722,"[4, 7]"
253 | 730,[7]
254 | 731,[2]
255 | 732,"[1, 4]"
256 | 739,[7]
257 | 740,"[0, 4, 5, 7]"
258 | 743,"[0, 1, 2, 7]"
259 | 746,[5]
260 | 748,"[1, 4]"
261 | 749,"[1, 7]"
262 | 750,"[0, 6]"
263 | 753,"[1, 4]"
264 | 754,"[1, 7]"
265 | 755,"[3, 4, 6, 7]"
266 | 757,[7]
267 | 759,"[1, 7]"
268 | 761,"[1, 3, 5, 7]"
269 | 766,[7]
270 | 


--------------------------------------------------------------------------------
/model_aton/ATON_ablation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time, math
  3 | import torch
  4 | import torch.utils.data as Data
  5 | import torch.optim as optim
  6 | from torch.optim import lr_scheduler
  7 | from tqdm import tqdm
  8 | 
  9 | from model_aton.utils import EarlyStopping, min_max_normalize
 10 | from model_aton.datasets import MyHardSingleTripletSelector
 11 | from model_aton.datasets import SingleTripletDataset
 12 | from model_aton.networks import ATONablanet
 13 | 
 14 | 
 15 | class ATONabla:
 16 |     """
 17 |     ablated version that removes self-attention mechanism
 18 |     """
 19 |     def __init__(self, nbrs_num=30, rand_num=30,
 20 |                  n_epoch=10, batch_size=64, lr=0.1, n_linear=64, margin=5.,
 21 |                  verbose=True):
 22 |         self.verbose = verbose
 23 | 
 24 |         self.x = None
 25 |         self.y = None
 26 |         self.ano_idx = None
 27 |         self.dim = None
 28 | 
 29 |         self.reason_map = {}
 30 | 
 31 |         cuda = torch.cuda.is_available()
 32 |         self.device = torch.device("cuda" if cuda else "cpu")
 33 |         if cuda:
 34 |             torch.cuda.set_device(0)
 35 | 
 36 |         self.nbrs_num = nbrs_num
 37 |         self.rand_num = rand_num
 38 | 
 39 |         self.n_epoch = n_epoch
 40 |         self.batch_size = batch_size
 41 |         self.lr = lr
 42 |         self.n_linear = n_linear
 43 |         self.margin = margin
 44 |         return
 45 | 
 46 |     def fit(self, x, y):
 47 |         device = self.device
 48 | 
 49 |         self.dim = x.shape[1]
 50 |         x = min_max_normalize(x)
 51 |         self.ano_idx = np.where(y == 1)[0]
 52 | 
 53 |         self.x = torch.tensor(x, dtype=torch.float32).to(device)
 54 |         self.y = torch.tensor(y, dtype=torch.int64).to(device)
 55 | 
 56 |         W_lst = []
 57 |         if self.verbose:
 58 |             iterator = range(len(self.ano_idx))
 59 |         else:
 60 |             iterator = tqdm(range(len(self.ano_idx)))
 61 |         for ii in iterator:
 62 |             idx = self.ano_idx[ii]
 63 | 
 64 |             s_t = time.time()
 65 |             W = self.interpret_ano(idx)
 66 |             W_lst.append(W)
 67 |             if self.verbose:
 68 |                 print("Ano_id:[{}], ({}/{}) \t time: {:.2f}s\n".format(
 69 |                     idx, (ii + 1), len(self.ano_idx), (time.time() - s_t)))
 70 | 
 71 |         fea_weight_lst = []
 72 |         for ii, idx in enumerate(self.ano_idx):
 73 |             w = W_lst[ii]
 74 |             fea_weight = np.zeros(self.dim)
 75 |             for j in range(len(w)):
 76 |                 fea_weight += abs(w[j])
 77 |             fea_weight_lst.append(fea_weight)
 78 |         return fea_weight_lst
 79 | 
 80 |     def interpret_ano(self, idx):
 81 |         device = self.device
 82 |         dim = self.dim
 83 | 
 84 |         data_loader, test_loader = self.prepare_triplets(idx)
 85 |         n_linear = self.n_linear
 86 |         model = ATONablanet(n_feature=dim, n_linear=n_linear)
 87 |         model.to(device)
 88 | 
 89 |         optimizer = optim.Adam(model.parameters(), lr=self.lr, weight_decay=1e-2)
 90 |         criterion_tml = torch.nn.TripletMarginLoss(margin=self.margin, p=2)
 91 | 
 92 |         scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.1)
 93 |         early_stp = EarlyStopping(patience=3, verbose=False)
 94 | 
 95 |         for epoch in range(self.n_epoch):
 96 |             model.train()
 97 |             total_loss = 0
 98 |             es_time = time.time()
 99 | 
100 |             batch_cnt = 0
101 |             for anchor, pos, neg in data_loader:
102 |                 anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)
103 |                 embed_anchor, embed_pos, embed_neg = model(anchor, pos, neg)
104 |                 loss = criterion_tml(embed_anchor, embed_pos, embed_neg)
105 | 
106 |                 total_loss += loss
107 | 
108 |                 optimizer.zero_grad()
109 |                 loss.backward()
110 |                 optimizer.step()
111 |                 batch_cnt += 1
112 | 
113 |             train_loss = total_loss / batch_cnt
114 |             est = time.time() - es_time
115 |             if (epoch + 1) % 1 == 0 and self.verbose:
116 |                 message = 'Epoch: [{:02}/{:02}]  loss: {:.4f} Time: {:.2f}s'.format(
117 |                     epoch + 1, self.n_epoch,
118 |                     train_loss, est)
119 |                 print(message)
120 |             scheduler.step()
121 | 
122 |             early_stp(train_loss, model)
123 |             if early_stp.early_stop:
124 |                 model.load_state_dict(torch.load(early_stp.path))
125 |                 if self.verbose:
126 |                     print("early stopping")
127 |                 break
128 | 
129 |         W = model.linear.weight.data.cpu().numpy()
130 |         return W
131 | 
132 |     def prepare_triplets(self, idx):
133 |         x = self.x
134 |         y = self.y
135 |         selector = MyHardSingleTripletSelector(nbrs_num=self.nbrs_num, rand_num=self.rand_num)
136 |         dataset = SingleTripletDataset(idx, x, y, triplets_selector=selector)
137 |         data_loader = Data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
138 |         test_loader = Data.DataLoader(dataset, batch_size=len(dataset))
139 |         return data_loader, test_loader
140 | 
141 | 


--------------------------------------------------------------------------------
/main2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | this script can perform outlier interpretation method SiNNe and Anchor
  3 | These methods directly use feature subspace as interpretation
  4 | 
  5 | @ Author: Hongzuo Xu
  6 | @ email: hongzuo.xu@gmail.com or leogarcia@126.com or xuhongzuo13@nudt.edu.cn
  7 | """
  8 | 
  9 | import os
 10 | import ast
 11 | import time, datetime
 12 | import argparse
 13 | import pandas as pd
 14 | import numpy as np
 15 | from prettytable import PrettyTable
 16 | from model_sinne.SiNNE import SiNNE
 17 | from model_iml.Anchor import Anchor
 18 | from config import root
 19 | from eval.evaluation_od import evaluation_od
 20 | from utils.eval_print_utils import print_eval_runs2
 21 | 
 22 | 
 23 | # ------------------- parameters ----------------- #
 24 | algorithm_name = "anchor"
 25 | 
 26 | parser = argparse.ArgumentParser()
 27 | parser.add_argument('--path', type=str, default="data/")
 28 | parser.add_argument('--runs', type=int, default=1)
 29 | parser.add_argument('--eval', type=ast.literal_eval, default=True, help='')
 30 | if algorithm_name == "sinne":
 31 |     parser.add_argument('--max_level', default='full', help='')
 32 |     parser.add_argument("--width", type=int, default=10, help='')
 33 |     parser.add_argument("--ensemble_num", type=int, default=100, help='')
 34 |     parser.add_argument("--sample_num", type=int, default=8, help='')
 35 |     parser.add_argument("--pretrain", type=bool, default=False, help='')
 36 |     parser.add_argument("--verbose", type=bool, default=False, help='')
 37 | elif algorithm_name == 'anchor':
 38 |     parser.add_argument('--kernel', default='rbf', help='')
 39 | else:
 40 |     raise NotImplementedError("not supported algorithm")
 41 | args = parser.parse_args()
 42 | 
 43 | input_root_list = [root + args.path]
 44 | od_eval_model = ["iforest", "copod", "hbos"]
 45 | runs = args.runs
 46 | record_name = ""
 47 | 
 48 | # ------------------- record ----------------- #
 49 | if not os.path.exists("record/" + algorithm_name):
 50 |     os.makedirs("record/" + algorithm_name)
 51 | record_path = "record/" + algorithm_name + "/zout." + \
 52 |               algorithm_name + "." + record_name + ".txt"
 53 | doc = open(record_path, 'a')
 54 | tab1 = PrettyTable(["parameter", "value"])
 55 | tab1.add_row(["@ data", str(input_root_list)])
 56 | tab1.add_row(["@ algorithm_name", str(algorithm_name)])
 57 | tab1.add_row(["@ runs", str(runs)])
 58 | tab1.add_row(["@ od_eval_model", str(od_eval_model)])
 59 | tab1.add_row(["@ start_time", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")])
 60 | for k in list(vars(args).keys()):
 61 |     tab1.add_row([k, vars(args)[k]])
 62 | print(tab1, file=doc)
 63 | print(tab1)
 64 | doc.close()
 65 | time.sleep(0.2)
 66 | 
 67 | 
 68 | def main(path, run_times):
 69 |     data_name = path.split("/")[-1].split(".")[0]
 70 | 
 71 |     # this is to remove the prefix index number of data set name, so that we can match the annotation file.
 72 |     data_name = data_name[3:]
 73 | 
 74 |     print("# ------------------ %s ------------------ # " % data_name)
 75 | 
 76 |     df = pd.read_csv(path)
 77 |     X = df.values[:, :-1]
 78 |     y = np.array(df.values[:, -1], dtype=int)
 79 | 
 80 |     runs_metric_lst = [[] for k in range(len(od_eval_model))]
 81 |     for i in range(run_times):
 82 |         print("runs: %d" % (i + 1))
 83 |         time1 = time.time()
 84 | 
 85 |         if algorithm_name == "sinne":
 86 |             model = SiNNE(max_level=args.max_level, width=args.width, ensemble_num=args.ensemble_num,
 87 |                           sample_num=args.sample_num, pretrain=args.pretrain)
 88 |             exp_subspace_list = model.fit(X, y)
 89 |         elif algorithm_name == 'anchor':
 90 |             model = Anchor()
 91 |             exp_subspace_list = model.fit(X, y)
 92 |         else:
 93 |             raise NotImplementedError("not implemented the algorithm")
 94 |         t = time.time() - time1
 95 | 
 96 |         if args.eval:
 97 |             # ---------------------- evaluation -------------------------- #
 98 |             for mm, eval_model in enumerate(od_eval_model):
 99 |                 precision, recall, jaccard = evaluation_od(exp_subspace_list, X, y, data_name, model_name=eval_model)
100 |                 metric_lst = [precision, recall, jaccard, t]
101 |                 runs_metric_lst[mm].append(metric_lst)
102 |                 print("{}, eval_model: {}, {}".format(data_name, eval_model, metric_lst))
103 | 
104 |     if args.eval:
105 |         for mm in range(len(od_eval_model)):
106 |             name = path.split("/")[-1].split(".")[0]
107 |             txt = print_eval_runs2(runs_metric_lst[mm], data_name=name, algo_name=algorithm_name)
108 |             print(txt)
109 |             doc = open(record_path, 'a')
110 |             print(txt, file=doc)
111 |             doc.close()
112 |     else:
113 |         txt = data_name + "," + str(round(t, 2)) + "," + algorithm_name
114 |         print(txt)
115 |         doc = open(record_path, 'a')
116 |         print(txt, file=doc)
117 |         doc.close()
118 |     return
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     for input_root in input_root_list:
123 |         if os.path.isdir(input_root):
124 |             for file_name in sorted(os.listdir(input_root)):
125 |                 if file_name.endswith(".csv"):
126 |                     input_path = str(os.path.join(input_root, file_name))
127 |                     main(input_path, runs)
128 | 
129 |         else:
130 |             input_path = input_root
131 |             main(input_path, runs)
132 | 


--------------------------------------------------------------------------------
/data_od_evaluation/pima_gt_iforest.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 0,"[0, 1, 3, 6, 7]"
  3 | 2,"[0, 1, 3, 5]"
  4 | 4,"[2, 6]"
  5 | 6,"[1, 2]"
  6 | 8,"[0, 1, 3, 4, 7]"
  7 | 9,"[0, 5]"
  8 | 11,"[0, 1, 3, 5]"
  9 | 13,[4]
 10 | 14,"[3, 4, 5, 7]"
 11 | 15,"[0, 1, 2]"
 12 | 16,"[0, 1, 2, 3, 4, 5]"
 13 | 17,"[0, 3]"
 14 | 19,[7]
 15 | 22,"[1, 2, 3]"
 16 | 23,"[0, 7]"
 17 | 24,"[0, 1, 2]"
 18 | 25,"[0, 4]"
 19 | 26,"[0, 3, 5]"
 20 | 31,[4]
 21 | 37,"[0, 1, 3, 6, 7]"
 22 | 38,"[1, 3]"
 23 | 39,"[3, 6, 7]"
 24 | 43,"[1, 2, 3, 4, 5]"
 25 | 45,"[0, 1, 6]"
 26 | 48,"[0, 2, 5, 7]"
 27 | 53,"[0, 1, 2, 4, 7]"
 28 | 56,"[0, 1, 2, 4]"
 29 | 61,"[0, 1, 3]"
 30 | 64,"[0, 3]"
 31 | 66,"[0, 6, 7]"
 32 | 70,"[4, 6]"
 33 | 72,"[0, 3]"
 34 | 78,"[2, 5]"
 35 | 84,"[2, 3]"
 36 | 88,[0]
 37 | 93,"[0, 7]"
 38 | 99,"[0, 1, 2, 3, 4, 5]"
 39 | 100,"[1, 3, 6]"
 40 | 109,"[0, 4]"
 41 | 110,"[1, 6, 7]"
 42 | 111,"[0, 2, 4]"
 43 | 114,"[0, 1, 2, 4]"
 44 | 115,"[1, 2, 7]"
 45 | 116,"[3, 7]"
 46 | 120,"[3, 5]"
 47 | 124,"[3, 7]"
 48 | 125,"[2, 5]"
 49 | 128,"[0, 2, 4, 7]"
 50 | 129,"[0, 7]"
 51 | 130,"[1, 3]"
 52 | 131,"[0, 2, 3, 6, 7]"
 53 | 132,"[1, 4]"
 54 | 143,"[0, 3]"
 55 | 152,"[0, 1, 2, 6, 7]"
 56 | 154,"[0, 1, 3, 5]"
 57 | 155,"[0, 3, 5]"
 58 | 159,[0]
 59 | 164,"[0, 1, 2, 3, 6]"
 60 | 165,"[0, 4]"
 61 | 170,"[0, 1, 3]"
 62 | 171,"[0, 4]"
 63 | 175,"[0, 1, 3]"
 64 | 177,[5]
 65 | 179,"[0, 6]"
 66 | 185,"[0, 1]"
 67 | 186,"[0, 1, 2, 4]"
 68 | 187,"[2, 6]"
 69 | 188,"[0, 3, 5, 7]"
 70 | 189,"[0, 4]"
 71 | 192,"[0, 1, 3]"
 72 | 193,"[0, 1, 2, 5]"
 73 | 195,"[0, 1, 2, 3, 4]"
 74 | 197,"[3, 4]"
 75 | 198,"[0, 3]"
 76 | 199,[4]
 77 | 206,"[0, 1, 2, 4, 5, 7]"
 78 | 207,"[2, 3]"
 79 | 209,"[0, 1]"
 80 | 213,"[3, 5]"
 81 | 214,"[0, 4]"
 82 | 215,"[0, 4]"
 83 | 216,"[0, 2, 3, 4, 7]"
 84 | 218,"[0, 6]"
 85 | 219,"[3, 5, 7]"
 86 | 220,"[0, 1, 2, 4]"
 87 | 221,"[0, 6, 7]"
 88 | 227,"[1, 2, 3]"
 89 | 230,"[0, 1, 3, 5, 7]"
 90 | 231,"[0, 4, 5]"
 91 | 235,"[1, 3, 5, 7]"
 92 | 236,"[1, 7]"
 93 | 237,"[1, 2, 3, 5, 7]"
 94 | 238,"[0, 3]"
 95 | 242,"[2, 3]"
 96 | 243,"[0, 2, 6]"
 97 | 245,"[0, 1, 2, 6, 7]"
 98 | 254,"[0, 2, 3, 4]"
 99 | 255,"[3, 7]"
100 | 259,"[0, 6, 7]"
101 | 261,"[2, 6]"
102 | 264,[7]
103 | 266,"[2, 6]"
104 | 269,"[1, 2]"
105 | 270,"[0, 1, 2, 5, 6]"
106 | 276,"[0, 2]"
107 | 280,"[0, 1, 3]"
108 | 283,"[0, 1, 3, 6, 7]"
109 | 284,"[0, 7]"
110 | 287,"[4, 5]"
111 | 291,"[0, 4]"
112 | 292,"[5, 6, 7]"
113 | 293,"[2, 3]"
114 | 296,[4]
115 | 298,[0]
116 | 300,"[0, 1, 2]"
117 | 301,"[1, 2]"
118 | 303,"[3, 5]"
119 | 306,"[0, 1, 3, 5, 7]"
120 | 308,"[0, 6]"
121 | 309,"[4, 6]"
122 | 312,"[0, 1, 3]"
123 | 314,"[0, 6]"
124 | 317,"[1, 3]"
125 | 319,"[1, 3, 5, 7]"
126 | 321,"[3, 6, 7]"
127 | 322,"[0, 7]"
128 | 323,"[0, 5]"
129 | 326,[4]
130 | 328,"[0, 5]"
131 | 332,"[1, 2]"
132 | 337,[7]
133 | 338,"[0, 4]"
134 | 339,"[0, 1, 3, 5]"
135 | 349,[1]
136 | 355,"[0, 1, 2, 3, 5, 7]"
137 | 356,"[2, 3]"
138 | 357,"[0, 2]"
139 | 359,"[0, 1]"
140 | 360,"[0, 1, 2, 4]"
141 | 363,"[0, 7]"
142 | 366,"[0, 3, 6, 7]"
143 | 369,[2]
144 | 370,"[0, 3, 4, 6, 7]"
145 | 375,"[0, 4]"
146 | 378,"[3, 5]"
147 | 386,"[0, 6]"
148 | 387,[2]
149 | 388,"[4, 7]"
150 | 391,"[1, 3, 5, 7]"
151 | 394,"[1, 3, 6]"
152 | 397,"[3, 7]"
153 | 399,[1]
154 | 400,"[0, 1, 3]"
155 | 402,"[0, 3]"
156 | 404,"[1, 2, 3]"
157 | 406,"[0, 7]"
158 | 408,"[1, 3, 6]"
159 | 409,"[2, 3, 4]"
160 | 414,"[0, 4]"
161 | 415,"[0, 1, 2, 4]"
162 | 417,"[0, 1, 5]"
163 | 419,"[4, 5]"
164 | 424,"[0, 4, 5]"
165 | 425,"[0, 1, 2, 4, 7]"
166 | 427,"[0, 1, 7]"
167 | 429,"[0, 1, 2, 4, 7]"
168 | 435,"[2, 5]"
169 | 440,"[0, 1, 2]"
170 | 443,"[0, 3, 6, 7]"
171 | 444,"[3, 4]"
172 | 445,"[3, 5]"
173 | 448,"[0, 4]"
174 | 451,"[3, 7]"
175 | 455,"[0, 2]"
176 | 458,"[0, 3]"
177 | 468,"[0, 1, 2]"
178 | 476,"[0, 3]"
179 | 480,[4]
180 | 484,"[2, 5]"
181 | 485,"[0, 4]"
182 | 493,"[0, 6, 7]"
183 | 498,"[0, 1, 3, 5, 7]"
184 | 502,[1]
185 | 506,"[0, 1, 7]"
186 | 510,"[0, 1]"
187 | 515,"[1, 3]"
188 | 516,"[0, 1, 2, 6, 7]"
189 | 523,"[0, 3]"
190 | 535,"[0, 1, 2]"
191 | 539,"[2, 3]"
192 | 540,"[0, 1, 4]"
193 | 541,"[0, 4]"
194 | 542,"[0, 1, 3, 6, 7]"
195 | 545,"[0, 1, 2, 4]"
196 | 546,"[1, 5, 7]"
197 | 560,"[6, 7]"
198 | 561,"[0, 1, 5]"
199 | 569,"[0, 4, 7]"
200 | 577,"[3, 5, 7]"
201 | 579,[3]
202 | 580,"[0, 1, 2, 3, 7]"
203 | 584,"[0, 4, 5, 7]"
204 | 586,"[0, 1, 3]"
205 | 588,"[0, 1, 6, 7]"
206 | 590,"[0, 5]"
207 | 592,"[0, 7]"
208 | 595,"[0, 1]"
209 | 598,"[0, 1, 3, 6, 7]"
210 | 603,"[4, 7]"
211 | 604,"[0, 1, 2]"
212 | 606,"[4, 6]"
213 | 611,"[1, 2]"
214 | 612,"[0, 1, 2, 4]"
215 | 614,"[0, 3]"
216 | 618,"[0, 6, 7]"
217 | 619,"[0, 1, 2]"
218 | 630,"[0, 3, 6, 7]"
219 | 635,"[0, 3]"
220 | 638,"[0, 5, 6, 7]"
221 | 642,"[3, 7]"
222 | 646,"[1, 5]"
223 | 647,"[0, 1, 2]"
224 | 648,"[0, 4]"
225 | 655,"[2, 4]"
226 | 659,"[1, 2, 6]"
227 | 661,"[1, 6]"
228 | 662,"[2, 3]"
229 | 663,"[0, 3]"
230 | 664,"[0, 2, 3, 7]"
231 | 666,[7]
232 | 667,"[0, 3]"
233 | 675,"[1, 3]"
234 | 676,"[0, 1, 3, 6, 7]"
235 | 678,"[2, 3]"
236 | 681,"[1, 5]"
237 | 683,"[0, 2, 3, 6, 7]"
238 | 689,"[3, 4, 5, 7]"
239 | 691,"[0, 1, 2]"
240 | 693,"[0, 3]"
241 | 695,"[0, 2, 4]"
242 | 696,"[1, 3]"
243 | 701,[7]
244 | 702,"[0, 6, 7]"
245 | 706,"[0, 5]"
246 | 708,"[0, 1, 3]"
247 | 709,"[1, 4]"
248 | 712,"[0, 5]"
249 | 715,"[0, 1, 2, 4]"
250 | 716,"[1, 6]"
251 | 719,"[1, 7]"
252 | 722,"[0, 1, 7]"
253 | 730,[7]
254 | 731,"[0, 7]"
255 | 732,"[0, 1, 5]"
256 | 739,"[0, 1, 3, 5, 7]"
257 | 740,"[0, 3, 5, 7]"
258 | 743,"[0, 2, 3, 6, 7]"
259 | 746,[5]
260 | 748,"[0, 1, 4]"
261 | 749,"[0, 1, 2, 6, 7]"
262 | 750,"[0, 6, 7]"
263 | 753,"[0, 1, 2, 4]"
264 | 754,"[0, 1]"
265 | 755,"[0, 6, 7]"
266 | 757,"[0, 7]"
267 | 759,"[1, 2, 7]"
268 | 761,"[0, 1, 5]"
269 | 766,"[0, 2, 7]"
270 | 


--------------------------------------------------------------------------------
/data_od_evaluation/pima_gt_copod.csv:
--------------------------------------------------------------------------------
  1 | ano_idx,exp_subspace
  2 | 0,"[0, 1, 6, 7]"
  3 | 2,"[0, 1, 2, 5]"
  4 | 4,"[2, 6]"
  5 | 6,[2]
  6 | 8,"[1, 3, 4, 5, 7]"
  7 | 9,[5]
  8 | 11,"[0, 1, 5]"
  9 | 13,[4]
 10 | 14,"[0, 1, 4, 5, 7]"
 11 | 15,[2]
 12 | 16,"[0, 2, 3, 4, 5]"
 13 | 17,"[0, 5]"
 14 | 19,"[3, 4, 5, 7]"
 15 | 22,"[1, 2]"
 16 | 23,"[0, 2, 3, 5]"
 17 | 24,"[0, 2, 3, 4, 7]"
 18 | 25,"[0, 4]"
 19 | 26,"[0, 1, 5, 7]"
 20 | 31,"[3, 4]"
 21 | 37,"[0, 2, 3, 7]"
 22 | 38,"[2, 3]"
 23 | 39,"[0, 3, 4, 6, 7]"
 24 | 43,"[0, 2, 4, 5]"
 25 | 45,"[0, 1, 2, 6]"
 26 | 48,"[0, 2, 3, 5]"
 27 | 53,"[0, 2, 3, 4, 7]"
 28 | 56,"[0, 1, 2, 3, 4, 5]"
 29 | 61,"[0, 1, 7]"
 30 | 64,"[0, 2, 7]"
 31 | 66,"[0, 2, 6, 7]"
 32 | 70,"[2, 6]"
 33 | 72,"[0, 2]"
 34 | 78,[2]
 35 | 84,[2]
 36 | 88,"[0, 2]"
 37 | 93,"[1, 5, 7]"
 38 | 99,"[2, 3, 4, 5]"
 39 | 100,"[1, 6, 7]"
 40 | 109,"[0, 2, 4, 5]"
 41 | 110,"[1, 3, 4]"
 42 | 111,"[0, 2, 4]"
 43 | 114,[2]
 44 | 115,"[1, 2, 7]"
 45 | 116,"[0, 6, 7]"
 46 | 120,"[3, 5]"
 47 | 124,"[0, 2, 7]"
 48 | 125,[2]
 49 | 128,"[2, 3, 4, 7]"
 50 | 129,"[6, 7]"
 51 | 130,"[0, 1, 4, 5]"
 52 | 131,"[0, 2, 6]"
 53 | 132,"[1, 2, 3, 4]"
 54 | 143,"[0, 2, 7]"
 55 | 152,"[0, 1, 2, 6, 7]"
 56 | 154,"[0, 1, 5, 7]"
 57 | 155,"[0, 2, 3, 5]"
 58 | 159,[0]
 59 | 164,"[0, 2, 6]"
 60 | 165,"[0, 4, 5, 6, 7]"
 61 | 170,"[0, 2, 6, 7]"
 62 | 171,"[0, 4]"
 63 | 175,"[0, 1, 3, 4]"
 64 | 177,"[0, 2, 3, 4, 5]"
 65 | 179,"[6, 7]"
 66 | 185,"[0, 1, 2, 6, 7]"
 67 | 186,"[0, 4, 5, 7]"
 68 | 187,"[2, 6]"
 69 | 188,"[0, 2, 3, 4, 5]"
 70 | 189,"[0, 2, 3, 4]"
 71 | 192,"[0, 1, 2, 5]"
 72 | 193,[2]
 73 | 195,"[0, 2, 3, 4, 5]"
 74 | 197,[5]
 75 | 198,"[2, 3]"
 76 | 199,"[0, 2, 4]"
 77 | 206,"[1, 7]"
 78 | 207,"[1, 2, 7]"
 79 | 209,"[0, 1, 2, 7]"
 80 | 213,"[0, 2, 4, 5]"
 81 | 214,"[0, 2, 3, 4]"
 82 | 215,"[0, 2, 3, 4, 5]"
 83 | 216,"[0, 2, 3, 4]"
 84 | 218,[6]
 85 | 219,"[0, 2, 7]"
 86 | 220,"[0, 1, 2, 4]"
 87 | 221,"[6, 7]"
 88 | 227,[2]
 89 | 230,"[1, 2, 5]"
 90 | 231,"[3, 4, 5, 7]"
 91 | 235,"[1, 5]"
 92 | 236,"[0, 1, 2, 7]"
 93 | 237,"[1, 2, 5]"
 94 | 238,"[0, 1, 2, 6]"
 95 | 242,[2]
 96 | 243,"[0, 2, 6]"
 97 | 245,"[0, 1, 2, 6, 7]"
 98 | 254,"[0, 2, 4, 5]"
 99 | 255,"[2, 3]"
100 | 259,"[0, 6, 7]"
101 | 261,[2]
102 | 264,[2]
103 | 266,[2]
104 | 269,[2]
105 | 270,"[0, 2, 5, 6]"
106 | 276,[2]
107 | 280,"[0, 1, 2, 5]"
108 | 283,"[0, 1, 2, 7]"
109 | 284,"[5, 7]"
110 | 287,"[2, 3, 4, 5]"
111 | 291,"[0, 2, 6]"
112 | 292,"[2, 3, 4, 6, 7]"
113 | 293,[2]
114 | 296,"[3, 4, 5]"
115 | 298,"[0, 4]"
116 | 300,[2]
117 | 301,[2]
118 | 303,"[2, 5]"
119 | 306,"[0, 1, 5, 7]"
120 | 308,[6]
121 | 309,"[2, 4, 6, 7]"
122 | 312,"[1, 5]"
123 | 314,"[6, 7]"
124 | 317,[1]
125 | 319,"[1, 5, 7]"
126 | 321,[3]
127 | 322,[5]
128 | 323,"[0, 1, 2, 5]"
129 | 326,"[2, 3, 4]"
130 | 328,"[2, 3, 4, 5]"
131 | 332,[2]
132 | 337,"[0, 2, 7]"
133 | 338,"[0, 2, 3, 4, 6, 7]"
134 | 339,"[0, 1, 2, 5]"
135 | 349,"[0, 1, 2, 5]"
136 | 355,"[0, 1, 2, 7]"
137 | 356,[2]
138 | 357,"[0, 2]"
139 | 359,"[1, 3, 4, 5]"
140 | 360,"[0, 1, 2, 4]"
141 | 363,"[1, 7]"
142 | 366,"[0, 5]"
143 | 369,[2]
144 | 370,"[2, 3, 4, 6]"
145 | 375,"[0, 3, 4, 7]"
146 | 378,"[1, 5]"
147 | 386,"[0, 6, 7]"
148 | 387,"[0, 2, 3, 7]"
149 | 388,"[0, 4, 7]"
150 | 391,"[1, 5]"
151 | 394,"[0, 1, 6]"
152 | 397,"[2, 3]"
153 | 399,"[1, 2]"
154 | 400,[2]
155 | 402,"[0, 2, 3, 4]"
156 | 404,"[0, 1, 2, 6, 7]"
157 | 406,"[0, 5, 7]"
158 | 408,"[0, 1, 5, 6, 7]"
159 | 409,"[3, 4]"
160 | 414,"[0, 2, 3, 4]"
161 | 415,"[1, 2, 3, 4]"
162 | 417,"[0, 1, 5, 6, 7]"
163 | 419,"[2, 4, 5]"
164 | 424,"[0, 4, 5]"
165 | 425,"[0, 1, 3, 4, 5]"
166 | 427,"[1, 2]"
167 | 429,"[3, 4, 5, 7]"
168 | 435,[2]
169 | 440,"[1, 2]"
170 | 443,"[0, 2, 6]"
171 | 444,[2]
172 | 445,"[3, 5]"
173 | 448,"[0, 2, 3, 4]"
174 | 451,"[1, 5, 6, 7]"
175 | 455,"[0, 1, 2]"
176 | 458,"[0, 2, 3, 4, 6, 7]"
177 | 468,[2]
178 | 476,"[2, 3, 4]"
179 | 480,[4]
180 | 484,[2]
181 | 485,"[0, 2, 3, 4, 5]"
182 | 493,"[6, 7]"
183 | 498,"[0, 1, 5, 7]"
184 | 502,"[0, 1, 2, 3, 5]"
185 | 506,"[1, 2]"
186 | 510,"[0, 7]"
187 | 515,"[1, 2]"
188 | 516,"[0, 2, 3, 4, 7]"
189 | 523,"[0, 6, 7]"
190 | 535,[2]
191 | 539,"[2, 3]"
192 | 540,"[0, 3, 4, 5, 7]"
193 | 541,[4]
194 | 542,"[0, 2, 6, 7]"
195 | 545,"[0, 1, 2, 3, 4]"
196 | 546,"[0, 1, 5, 6, 7]"
197 | 560,"[6, 7]"
198 | 561,"[1, 2]"
199 | 569,"[0, 2, 3, 4]"
200 | 577,"[6, 7]"
201 | 579,[3]
202 | 580,"[2, 3]"
203 | 584,"[0, 4, 5, 7]"
204 | 586,"[0, 1, 2, 6, 7]"
205 | 588,"[1, 2, 6, 7]"
206 | 590,"[0, 5, 6, 7]"
207 | 592,"[1, 7]"
208 | 595,"[1, 2]"
209 | 598,"[1, 6, 7]"
210 | 603,"[0, 1, 6, 7]"
211 | 604,[2]
212 | 606,"[1, 2, 3, 4, 5]"
213 | 611,"[1, 2]"
214 | 612,"[0, 2, 3, 4]"
215 | 614,"[0, 4, 7]"
216 | 618,"[0, 6, 7]"
217 | 619,[2]
218 | 630,"[0, 2, 5, 6]"
219 | 635,"[0, 2]"
220 | 638,"[0, 5, 6]"
221 | 642,"[0, 1, 5, 7]"
222 | 646,"[1, 5]"
223 | 647,[2]
224 | 648,"[0, 2, 3, 4, 5]"
225 | 655,"[2, 4, 5]"
226 | 659,[6]
227 | 661,[1]
228 | 662,"[0, 2, 3, 4]"
229 | 663,"[0, 2, 3, 4]"
230 | 664,"[0, 2, 3, 7]"
231 | 666,[7]
232 | 667,"[0, 5, 6, 7]"
233 | 675,"[1, 2]"
234 | 676,"[0, 1, 5, 7]"
235 | 678,[2]
236 | 681,"[1, 5]"
237 | 683,"[0, 2, 6]"
238 | 689,"[3, 4, 5, 7]"
239 | 691,"[0, 2]"
240 | 693,"[0, 2, 3, 4]"
241 | 695,"[0, 2, 4]"
242 | 696,"[1, 5]"
243 | 701,"[0, 3, 5, 7]"
244 | 702,"[1, 2, 6, 7]"
245 | 706,[5]
246 | 708,"[0, 1, 2, 7]"
247 | 709,"[2, 3, 4, 5]"
248 | 712,"[0, 2, 3, 5]"
249 | 715,"[0, 1, 2, 4]"
250 | 716,"[1, 2, 3, 4]"
251 | 719,[7]
252 | 722,"[3, 4, 5, 7]"
253 | 730,"[3, 4, 5, 7]"
254 | 731,"[0, 1, 2, 5]"
255 | 732,"[1, 2, 3, 4, 5]"
256 | 739,"[1, 5, 7]"
257 | 740,"[0, 2, 3, 4, 5, 7]"
258 | 743,"[0, 1, 2, 6, 7]"
259 | 746,"[2, 3, 5]"
260 | 748,"[1, 4, 5]"
261 | 749,"[0, 1, 2, 5, 7]"
262 | 750,[6]
263 | 753,"[0, 1, 2, 3, 4, 5]"
264 | 754,"[0, 1, 2, 7]"
265 | 755,"[2, 3, 4, 6, 7]"
266 | 757,[7]
267 | 759,"[1, 2, 7]"
268 | 761,"[0, 1, 5, 7]"
269 | 766,[2]
270 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Outlier Interpretation
  2 | 
  3 | This repository contains the source code for the paper **Beyond Outlier Detection: Interpreting Outliers by  Attention-Guided Triplet Deviation Network** published in the Web Conference (WWW'21).   
  4 | 
  5 | Note that this task is also referred to as outlier explanation, outlier aspect mining/discovering, outlier property detection, and outlier description.
  6 | 
  7 | 
  8 | 
  9 | ### Seven Outlier Interpretation Methods
 10 | 
 11 | **This repository contains seven outlier interpretation methods: ATON [1], COIN[2], SiNNE[3], SHAP[4], LIME[5], Integrated Gradients [6], and Anchor [7].**
 12 | 
 13 | [1] Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network. In WWW. 2021.
 14 | 
 15 | [2] Contextual outlier interpretation. In IJCAI. 2018.
 16 | 
 17 | [3] A new effective and efficient measure for outlying aspect mining. arXiv preprint arXiv:2004.13550. 2020.
 18 | 
 19 | [4] A unified approach to interpreting model predictions. In NeuraIPS. 2017
 20 | 
 21 | [5] "Why should I trust you?" Explaining the predictions of any classifier. In SIGKDD. 2016.
 22 | 
 23 | [6] Axiomatic attribution for deep networks. In ICML. 2017.
 24 | 
 25 | [7] Anchors: High Precision Model-Agnostic Explanations. In AAAI. 2018.
 26 | 
 27 | 
 28 | 
 29 | ### Structure
 30 | `data_od_evaluation`: Ground-truth outlier interpretation annotations of real-world datasets  
 31 | `data`: real-world datasets in csv format, the last column is label indicating each line is an outlier or a inlier  
 32 | `model_xx`: folders of ATON and its contenders, the competitors are introduced in Section 5.1.2  
 33 | `config.py`: configuration and default hyper-parameters  
 34 | `main.py` main script to run the experiments
 35 | 
 36 | 
 37 | 
 38 | ### How to use?
 39 | ##### 1. For ATON and competitor COIN, SHAP, and LIME, and IntGrad
 40 | 1. modify variant `algorithm_name` in `main.py` (support algorithm: `aton`, `coin`, `shap`, `lime`  in lowercase)
 41 | 2. use `python main.py --path data/ --runs 10 `
 42 | 3. the results can be found in `record/[algorithm_name]/` folder  
 43 | 
 44 | ##### 2. For ATON' and competitor COIN' 
 45 | 1. modify variant `algorithm_name` in `main.py` to `aton` or `coin`  
 46 | 2. use `python main.py --path data/ --w2s_ratio auto --runs 10` to run ATON'  
 47 |    use `python main.py --path data/ --w2s_ratio pn --runs 10` to run COIN'  
 48 | 
 49 | ##### 3. For competitor SiNNE and Anchor
 50 | 1. modify variant `algorithm_name` in `main2.py` to `sinne` or `anchor`  
 51 | please run `python main2.py --path data/ --runs 10` 
 52 | 
 53 | 
 54 | 
 55 | ### args of main.py
 56 | - `--path [str]`        - the path of data folder or an individual data file (in csv format)  
 57 | 
 58 | - `--gpu  [True/False]` - use GPU or not
 59 | 
 60 | - `--runs [int]`         - how many times to run a method on each dataset (we run 10 times and report average performance in our submission)
 61 | 
 62 | - `--w2s_ratio [auto/real_len/pn]`  - how to transfer feature weight to feature subspace 'real-len', 'auto', or 'pn' 
 63 | denote the same length with the ground-truth, auto generating subspace by the proposed threshold or positive-negative.
 64 | (in our paper, we use 'pn' in COIN', use 'auto' in ATON'. As for methods which output, we directly use 'real-len'.)
 65 | 
 66 | - `--eval [True/False]` - evaluate or not, use False for scalability test  
 67 |   ... (other hypter-parameters of different methods. You may want to use -h to check the corresponding hypter-parameters after modifing the `algorithm_name`)  
 68 | 
 69 |   
 70 | 
 71 | ### Requirements
 72 | main packages of this project  
 73 | ```
 74 | torch==1.3.0
 75 | numpy==1.15.0
 76 | pandas==0.25.2
 77 | scikit-learn==0.23.1
 78 | pyod==0.8.2
 79 | tqdm==4.48.2
 80 | prettytable==0.7.2
 81 | shap==0.35.0
 82 | lime==0.2.0.1
 83 | alibi==0.5.5
 84 | ```
 85 | 
 86 | 
 87 | 
 88 | ### Ground-truth annotations
 89 | 
 90 | Please also find the Ground-truth outlier interpretation annotations in folder `data_od_evaluation`.   
 91 | *We expect these annotations can foster further possible reasearchs on this new practical probelm.*  
 92 | 
 93 | You may find that each dataset has three annotation files, please refer to the detailed annotation generation process in our submission. We detailedly introduced it in Section 5.1.4:  
 94 | 
 95 | **How to generate the ground-truth annotations:**
 96 | >  We employ three different kinds of representative outlier detection methods (i.e., ensemble-based method iForest, probability-based method COPOD, and distance-based method HBOS) to evaluate outlying degree of real outliers given every possible subspace. A good explanation for an outlier should be a high-contrast subspace that the outlier explicitly demonstrates its outlierness, and outlier detectors can easily and certainly predict it as an outlier in this subspace. Therefore, the ground-truth interpretation for each outlier is defined as the subspace that the outlier obtains the highest outlier score among all the possible subspaces.
 97 | 
 98 | 
 99 | 
100 | ### a typo in the paper
101 | 
102 | 
103 | 
104 | In the second page, "As shown in Figure 1 (a), the queried outlier is ..., and the interpretation is feature subspace **$\{f1, f2\}$**" should be **$\{f1, f3\}$**.
105 | 
106 | We appreciate @Zeyi Li (NJPU) for finding this typo.  
107 | 
108 | 
109 | 
110 | ### References
111 | - datasets are from ODDS, an outlier detection datasets library (http://odds.cs.stonybrook.edu/), and kaggle platform (https://www.kaggle.com/)
112 | - the source code of competitor COIN is publicly available in GitHub. 
113 | 
114 | 
115 | 
116 | ### Citation
117 | 
118 | 😄 If you find this useful in your research, please consider citing:
119 | ```
120 | @inproceedings{xu2021aton,
121 | 	title={Beyond Outlier Detection: Interpreting Outliers by  Attention-Guided Triplet Deviation Network},
122 | 	author={Xu, Hongzuo and Wang, Yijie and Jian, Songlei and Huang, Zhenyu and Wang, Yongjun and Liu, Ning and Li, Fei},
123 | 	booktitle={Proceedings of The Web Conference 2021 (WWW’21)},
124 | 	year={2021},
125 | 	publisher={ACM}
126 | }
127 | ```
128 | 


--------------------------------------------------------------------------------
/model_aton/networks.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script implements an outlier interpretation method of the following paper:
  3 | "Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network". in WWW'21.
  4 | @ Author: Hongzuo Xu
  5 | @ email: hongzuo.xu@gmail.com or leogarcia@126.com or xuhongzuo13@nudt.edu.cn
  6 | """
  7 | 
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class ATONnet(nn.Module):
 15 |     def __init__(self, attn_net, n_feature, n_linear):
 16 |         super(ATONnet, self).__init__()
 17 |         self.attn_net = attn_net
 18 |         self.linear = torch.nn.Linear(n_feature, n_linear, bias=False)
 19 | 
 20 |     def forward(self, anchor, positive, negative):
 21 |         anchor = self.linear(anchor)
 22 |         positive = self.linear(positive)
 23 |         negative = self.linear(negative)
 24 | 
 25 |         cat = torch.cat([negative, anchor, positive], dim=1)
 26 | 
 27 |         attn = self.attn_net(cat)
 28 |         embedded_n = negative * attn
 29 |         embedded_a = anchor * attn
 30 |         embedded_p = positive * attn
 31 | 
 32 |         embedded_n_dff = (1 - attn) * negative
 33 |         embedded_a_dff = (1 - attn) * anchor
 34 |         embedded_p_dff = (1 - attn) * positive
 35 |         dis1 = F.pairwise_distance(embedded_n_dff, embedded_a_dff)
 36 |         dis2 = F.pairwise_distance(embedded_p_dff, embedded_a_dff)
 37 |         dis = torch.abs(dis1 - dis2)
 38 | 
 39 |         return embedded_a, embedded_p, embedded_n, attn, dis
 40 | 
 41 |     def get_lnr(self, x):
 42 |         return self.linear(x)
 43 | 
 44 | 
 45 | class AttentionNet(nn.Module):
 46 |     def __init__(self, in_feature, n_hidden, out_feature):
 47 |         super(AttentionNet, self).__init__()
 48 |         self.hidden = torch.nn.Linear(in_feature, n_hidden)
 49 |         self.out = torch.nn.Linear(n_hidden, out_feature)
 50 | 
 51 |     def forward(self, x):
 52 |         x = torch.relu(self.hidden(x))
 53 |         x = self.out(x)
 54 |         _min = torch.unsqueeze(torch.min(x, dim=1)[0], 0).t()
 55 |         _max = torch.unsqueeze(torch.max(x, dim=1)[0], 0).t()
 56 |         x = (x - _min) / (_max - _min)
 57 |         return x
 58 | 
 59 | 
 60 | class MyLoss(nn.Module):
 61 |     """
 62 |     triplet deviation-based loss
 63 |     """
 64 |     def __init__(self, alpha1, alpha2, margin):
 65 |         super(MyLoss, self).__init__()
 66 |         self.alpha1 = alpha1
 67 |         self.alpha2 = alpha2
 68 |         self.criterion_tml = torch.nn.TripletMarginLoss(margin=margin, p=2)
 69 |         return
 70 | 
 71 |     def forward(self, embed_anchor, embed_pos, embed_neg, dis):
 72 |         loss_tml = self.criterion_tml(embed_anchor, embed_pos, embed_neg)
 73 |         loss_dis = torch.mean(dis)
 74 |         loss = self.alpha1 * loss_tml + self.alpha2 * loss_dis
 75 |         return loss
 76 | 
 77 | 
 78 | # ---------------------- ATON - ablation -------------------------- #
 79 | # without attention
 80 | class ATONablanet(nn.Module):
 81 |     def __init__(self, n_feature, n_linear):
 82 |         super(ATONablanet, self).__init__()
 83 |         self.linear = torch.nn.Linear(n_feature, n_linear, bias=False)
 84 | 
 85 |     def forward(self, anchor, positive, negative):
 86 |         embedded_a = self.linear(anchor)
 87 |         embedded_p = self.linear(positive)
 88 |         embedded_n = self.linear(negative)
 89 |         return embedded_a, embedded_p, embedded_n
 90 | 
 91 |     def get_lnr(self, x):
 92 |         return self.linear(x)
 93 | 
 94 | 
 95 | # ---------------------- ATON - ablation -------------------------- #
 96 | # without feature embedding module
 97 | class ATONabla2net(nn.Module):
 98 |     """
 99 |     without feature embedding module
100 |     """
101 |     def __init__(self, attn_net):
102 |         super(ATONabla2net, self).__init__()
103 |         self.attn_net = attn_net
104 | 
105 |     def forward(self, anchor, positive, negative):
106 |         cat = torch.cat([negative, anchor, positive], dim=1)
107 |         attn = self.attn_net(cat)
108 | 
109 |         embedded_n = negative * attn
110 |         embedded_a = anchor * attn
111 |         embedded_p = positive * attn
112 | 
113 |         embedded_n_dff = (1 - attn) * negative
114 |         embedded_a_dff = (1 - attn) * anchor
115 |         embedded_p_dff = (1 - attn) * positive
116 |         dis1 = F.pairwise_distance(embedded_n_dff, embedded_a_dff)
117 |         dis2 = F.pairwise_distance(embedded_p_dff, embedded_a_dff)
118 |         dis = torch.abs(dis1 - dis2)
119 | 
120 |         return embedded_a, embedded_p, embedded_n, attn, dis
121 | 
122 | 
123 | # -------------------------- ATON - ablation3 ------------------------------ #
124 | # test the significance of triplet deviation-based loss function
125 | 
126 | class ATONabla3net(nn.Module):
127 |     def __init__(self, attn_net, clf_net, n_feature, n_linear):
128 |         super(ATONabla3net, self).__init__()
129 |         self.attn_net = attn_net
130 |         self.clf_net = clf_net
131 |         self.linear = torch.nn.Linear(n_feature, n_linear, bias=False)
132 | 
133 |     def forward(self, x):
134 |         x = self.linear(x)
135 |         attn = self.attn_net(x)
136 |         x = x * attn
137 |         x = self.clf_net(x)
138 |         return x, attn
139 | 
140 |     def get_lnr(self, x):
141 |         return self.linear(x)
142 | 
143 | 
144 | class ClassificationNet(nn.Module):
145 |     def __init__(self, n_feature):
146 |         super(ClassificationNet, self).__init__()
147 |         self.linear = torch.nn.Linear(n_feature, 2)
148 | 
149 |     def forward(self, x):
150 |         x = self.linear(x)
151 |         return x
152 | 
153 | 
154 | class MyLossClf(nn.Module):
155 |     """
156 |     loss function for ablation3
157 |     """
158 |     def __init__(self, alpha1, alpha2, alpha3, margin):
159 |         super(MyLossClf, self).__init__()
160 |         self.alpha1 = alpha1
161 |         self.alpha2 = alpha2
162 |         self.alpha3 = alpha3
163 |         self.criterion_tml = torch.nn.TripletMarginLoss(margin=margin, p=2)
164 |         self.criterion_cel = torch.nn.CrossEntropyLoss()
165 |         return
166 | 
167 |     def forward(self, embed_anchor, embed_pos, embed_neg, clf_out, batch_y, dis):
168 |         loss_tml = self.criterion_tml(embed_anchor, embed_pos, embed_neg)
169 |         loss_cel = self.criterion_cel(clf_out, batch_y)
170 |         loss_dis = torch.mean(dis)
171 |         loss = self.alpha1 * loss_tml + + self.alpha2 * loss_cel + self.alpha3 * loss_dis
172 |         return loss
173 | 


--------------------------------------------------------------------------------
/model_aton/ATON_ablation2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time, math
  3 | import torch
  4 | import torch.utils.data as Data
  5 | import torch.optim as optim
  6 | import torch.nn.functional as F
  7 | 
  8 | from torch.optim import lr_scheduler
  9 | from sklearn.neighbors import NearestNeighbors
 10 | from sklearn import metrics
 11 | from tqdm import tqdm
 12 | from model_aton.utils import EarlyStopping, min_max_normalize
 13 | 
 14 | from model_aton.datasets import MyHardSingleTripletSelector
 15 | from model_aton.datasets import SingleTripletDataset
 16 | from model_aton.networks import ATONabla2net, AttentionNet
 17 | from model_aton.networks import MyLoss
 18 | 
 19 | 
 20 | class ATONabla2:
 21 |     def __init__(self, nbrs_num=30, rand_num=30, alpha1=0.8, alpha2=0.2,
 22 |                  n_epoch=10, batch_size=64, lr=0.1, margin=2.,
 23 |                  verbose=True, gpu=True):
 24 |         self.verbose = verbose
 25 | 
 26 |         self.x = None
 27 |         self.y = None
 28 |         self.ano_idx = None
 29 |         self.dim = None
 30 | 
 31 |         # a list of normal nbr of each anomaly
 32 |         self.normal_nbr_indices = []
 33 | 
 34 |         cuda = torch.cuda.is_available()
 35 |         self.device = torch.device("cuda" if cuda and gpu else "cpu")
 36 |         if cuda:
 37 |             torch.cuda.set_device(0)
 38 |         print("device:", self.device)
 39 | 
 40 |         self.nbrs_num = nbrs_num
 41 |         self.rand_num = rand_num
 42 |         self.alpha1 = alpha1
 43 |         self.alpha2 = alpha2
 44 | 
 45 |         self.n_epoch = n_epoch
 46 |         self.batch_size = batch_size
 47 |         self.lr = lr
 48 |         self.margin = margin
 49 |         return
 50 | 
 51 |     def fit(self, x, y):
 52 |         device = self.device
 53 | 
 54 |         self.dim = x.shape[1]
 55 |         x = min_max_normalize(x)
 56 |         self.ano_idx = np.where(y == 1)[0]
 57 | 
 58 |         self.x = torch.tensor(x, dtype=torch.float32).to(device)
 59 |         self.y = torch.tensor(y, dtype=torch.int64).to(device)
 60 |         self.prepare_nbrs()
 61 | 
 62 |         # train model for each anomaly
 63 |         attn_lst = []
 64 |         if self.verbose:
 65 |             iterator = range(len(self.ano_idx))
 66 |         else:
 67 |             iterator = tqdm(range(len(self.ano_idx)))
 68 |         for ii in iterator:
 69 |             idx = self.ano_idx[ii]
 70 | 
 71 |             s_t = time.time()
 72 |             attn = self.interpret_ano(ii)
 73 |             attn_lst.append(attn)
 74 | 
 75 |             if self.verbose:
 76 |                 print("Ano_id:[{}], ({}/{}) \t time: {:.2f}s\n".format(
 77 |                     idx, (ii + 1), len(self.ano_idx),
 78 |                     (time.time() - s_t)))
 79 | 
 80 |         # fea_weight_lst = []
 81 |         # for ii, idx in enumerate(self.ano_idx):
 82 |         #     attn = attn_lst[ii]
 83 |         #     fea_weight = attn
 84 |         #     fea_weight_lst.append(fea_weight)
 85 |         return attn_lst
 86 | 
 87 |     def interpret_ano(self, ii):
 88 |         idx = self.ano_idx[ii]
 89 |         device = self.device
 90 |         dim = self.dim
 91 | 
 92 |         nbr_indices = self.normal_nbr_indices[ii]
 93 |         data_loader, test_loader = self.prepare_triplets(idx, nbr_indices)
 94 |         attn_net = AttentionNet(in_feature=3 * dim, n_hidden=int(1.5 * dim), out_feature=dim)
 95 |         model = ATONabla2net(attn_net=attn_net)
 96 |         model.to(device)
 97 | 
 98 |         optimizer = optim.Adam(model.parameters(), lr=self.lr, weight_decay=1e-2)
 99 |         criterion = MyLoss(alpha1=self.alpha1, alpha2=self.alpha2, margin=self.margin)
100 | 
101 |         scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.1)
102 |         early_stp = EarlyStopping(patience=3, verbose=False)
103 | 
104 |         for epoch in range(self.n_epoch):
105 |             model.train()
106 |             total_loss = 0
107 |             total_dis = 0
108 |             es_time = time.time()
109 | 
110 |             batch_cnt = 0
111 |             for anchor, pos, neg in data_loader:
112 |                 anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)
113 |                 embed_anchor, embed_pos, embed_neg, attn, dis = model(anchor, pos, neg)
114 | 
115 |                 loss = criterion(embed_anchor, embed_pos, embed_neg, dis)
116 | 
117 |                 total_loss += loss
118 |                 total_dis += dis.mean()
119 | 
120 |                 optimizer.zero_grad()
121 |                 loss.backward()
122 |                 optimizer.step()
123 |                 batch_cnt += 1
124 | 
125 |             train_loss = total_loss / batch_cnt
126 |             # dis = total_dis / batch_cnt
127 |             est = time.time() - es_time
128 | 
129 |             if self.verbose and (epoch + 1) % 1 == 0:
130 |                 message = 'Epoch: [{:02}/{:02}]  loss: {:.4f} Time: {:.2f}s'.format(
131 |                     epoch + 1, self.n_epoch, train_loss, est)
132 |                 print(message)
133 |             scheduler.step()
134 | 
135 |             early_stp(train_loss, model)
136 |             if early_stp.early_stop:
137 |                 model.load_state_dict(torch.load(early_stp.path))
138 |                 if self.verbose:
139 |                     print("early stopping")
140 |                 break
141 | 
142 |         for anchor, pos, neg in test_loader:
143 |             model.eval()
144 |             anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)
145 |             _, _, _, attn, _ = model(anchor, pos, neg)
146 | 
147 |         attn_avg = torch.mean(attn, dim=0)
148 |         attn_avg = attn_avg.data.cpu().numpy()
149 |         return attn_avg
150 | 
151 |     def prepare_triplets(self, idx, nbr_indices):
152 |         x = self.x
153 |         y = self.y
154 |         selector = MyHardSingleTripletSelector(nbrs_num=self.nbrs_num, rand_num=self.rand_num,
155 |                                                nbr_indices=nbr_indices)
156 |         dataset = SingleTripletDataset(idx, x, y, triplets_selector=selector)
157 |         data_loader = Data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
158 |         test_loader = Data.DataLoader(dataset, batch_size=len(dataset))
159 |         return data_loader, test_loader
160 | 
161 |     def prepare_nbrs(self):
162 |         x = self.x.cpu().data.numpy()
163 |         y = self.y.cpu().data.numpy()
164 | 
165 |         anom_idx = np.where(y == 1)[0]
166 |         x_anom = x[anom_idx]
167 |         noml_idx = np.where(y == 0)[0]
168 |         x_noml = x[noml_idx]
169 |         n_neighbors = self.nbrs_num
170 | 
171 |         nbrs_local = NearestNeighbors(n_neighbors=n_neighbors).fit(x_noml)
172 |         tmp_indices = nbrs_local.kneighbors(x_anom)[1]
173 | 
174 |         for idx in tmp_indices:
175 |             nbr_indices = noml_idx[idx]
176 |             self.normal_nbr_indices.append(nbr_indices)
177 |         return
178 | 


--------------------------------------------------------------------------------
/eval/evaluation_od.py:
--------------------------------------------------------------------------------
  1 | from sklearn.neighbors import LocalOutlierFactor
  2 | from pyod.models.iforest import IForest
  3 | from pyod.models.hbos import HBOS
  4 | from pyod.models.loda import LODA
  5 | from pyod.models.copod import COPOD
  6 | from tqdm import tqdm
  7 | import numpy as np
  8 | import pandas as pd
  9 | import os
 10 | import ast
 11 | import eval.evaluation_utils as utils
 12 | from sklearn import metrics
 13 | from config import eva_root
 14 | 
 15 | 
 16 | def evaluation_od_train(x, y, data_name, model_name="iforest", chosen_subspace=None):
 17 |     """
 18 |     using anomaly detector to yield anomaly score for each subspace,
 19 |     generate two files: the subspaces with the highest anomaly score & lof score for each subspace
 20 |     :param x: data matrix
 21 |     :param y: class information
 22 |     :param data_name: the data set name, using for naming the ground truth file
 23 |     :param model_name: anomaly detector name, default: lof
 24 |     :param chosen_subspace: use this to only evaluate a subset of the power set of full feature space
 25 |     :return: df: a ground-truth map using anomaly idx as key and ground truth feature subspace as value.
 26 |     """
 27 |     global chosen_model
 28 | 
 29 |     dim = x.shape[1]
 30 |     ano_idx = np.where(y == 1)[0]
 31 |     n_ano = len(ano_idx)
 32 | 
 33 |     # get all the possible feature subset or just use given subset list
 34 |     f_subsets = utils.get_subset_candidate(dim, chosen_subspace)
 35 | 
 36 |     # score anomalies in each subspace, generate the score matrix
 37 |     n_subsets = len(f_subsets)
 38 |     score_matrix = np.zeros([n_ano, n_subsets])
 39 |     for i in tqdm(range(n_subsets)):
 40 |         subset = f_subsets[i]
 41 |         x_subset = x[:, subset]
 42 | 
 43 | 
 44 |         if model_name == "iforest":
 45 |             clf = IForest()
 46 |             clf.fit(x_subset)
 47 |             od_score = clf.decision_scores_
 48 |         elif model_name == "copod":
 49 |             clf = COPOD()
 50 |             clf.fit(x_subset)
 51 |             od_score = clf.decision_scores_
 52 |         elif model_name == "hbos":
 53 |             clf = HBOS()
 54 |             clf.fit(x_subset)
 55 |             od_score = clf.decision_scores_
 56 |         else:
 57 |             raise ValueError("unsupported od model")
 58 | 
 59 |         od_score = utils.min_max_norm(od_score)
 60 |         score_matrix[:, i] = od_score[ano_idx]
 61 | 
 62 |     if not os.path.exists(eva_root + "data_od_evaluation/"):
 63 |         os.makedirs(eva_root + "data_od_evaluation/")
 64 | 
 65 |     # score matrix to df
 66 |     anomaly_score_df = pd.DataFrame(data=score_matrix, columns=[str(s) for s in f_subsets])
 67 |     col_name = anomaly_score_df.columns.tolist()
 68 |     col_name.insert(0, 'ano_idx')
 69 |     anomaly_score_df["ano_idx"] = ano_idx
 70 |     anomaly_score_df = anomaly_score_df.reindex(columns=col_name)
 71 |     path1 = eva_root + "data_od_evaluation/" + data_name + "_score_" + model_name + ".csv"
 72 |     anomaly_score_df.to_csv(path1, index=False)
 73 | 
 74 |     # get the ground truth (one subspace for each anomaly that the anomaly can obtain the highest anomaly score)
 75 |     g_truth_df = pd.DataFrame(columns=["ano_idx", "exp_subspace"])
 76 | 
 77 |     exp_subspaces = []
 78 |     for ii, ano_score in enumerate(score_matrix):
 79 |         max_score_idx = int(np.argmax(ano_score))
 80 |         exp_subset = str(f_subsets[max_score_idx])
 81 |         exp_subspaces.append(exp_subset)
 82 |     g_truth_df["ano_idx"] = ano_idx
 83 |     g_truth_df["exp_subspace"] = exp_subspaces
 84 | 
 85 |     g_truth_df.astype({"exp_subspace": "object"})
 86 |     path2 = eva_root + "data_od_evaluation/" + data_name + "_gt_" + model_name + ".csv"
 87 |     g_truth_df.to_csv(path2, index=False)
 88 |     return anomaly_score_df, g_truth_df
 89 | 
 90 | 
 91 | def evaluation_od(exp_subspace_list, x, y, data_name, model_name):
 92 |     """
 93 |     use outlier detection to evaluate the explanation subspace for each anomaly data object,
 94 |     to evaluate whether this subspace is a high-contrast subspace to highlight this anomaly
 95 |     i.e., the anomaly detector can or cannot get a higher score in this space
 96 |     :param exp_subspace_list: explanation feature subspace for each anomaly, corresponding to ano_idx
 97 |     :param x: data set
 98 |     :param y: label
 99 |     :param data_name: name of dataset
100 |     :param model_name: the name of anomaly detector to generate ground truth
101 |     :return: average precision, jaccard, and anomaly score
102 |     """
103 |     path1 = eva_root + "data_od_evaluation/" + data_name + "_gt_" + model_name + ".csv"
104 |     if not os.path.exists(path1):
105 |         print("annotation file not found, labeling now...")
106 |         _, g_truth_df = evaluation_od_train(x, y, data_name, model_name)
107 |     else:
108 |         g_truth_df = pd.read_csv(path1)
109 | 
110 |     ano_idx = np.where(y == 1)[0]
111 | 
112 |     precision_list = np.zeros(len(ano_idx))
113 |     jaccard_list = np.zeros(len(ano_idx))
114 |     recall_list = np.zeros(len(ano_idx))
115 | 
116 |     for ii, ano in enumerate(ano_idx):
117 |         exp_subspace = list(exp_subspace_list[ii])
118 |         gt_subspace_str = g_truth_df.loc[g_truth_df["ano_idx"] == ano]["exp_subspace"].values[0]
119 |         gt_subspace = ast.literal_eval(gt_subspace_str)
120 | 
121 |         overlap = list(set(gt_subspace).intersection(set(exp_subspace)))
122 |         union = list(set(gt_subspace).union(set(exp_subspace)))
123 | 
124 |         precision_list[ii] = len(overlap) / len(exp_subspace)
125 |         jaccard_list[ii] = len(overlap) / len(union)
126 |         recall_list[ii] = len(overlap) / len(gt_subspace)
127 | 
128 |     return precision_list.mean(), recall_list.mean(), jaccard_list.mean()
129 | 
130 | 
131 | def evaluation_od_auc(feature_weight, x, y, data_name, model_name="iforest"):
132 |     """
133 |     use outlier detection to evaluate the explanation subspace for each anomaly data,
134 |     whether this subspace is a high-contrast subspace to highlight this anomaly
135 |     :param exp_subspace_list: explanation feature subspace for each anomaly, corresponding to ano_idx
136 |     :param x: data set
137 |     :param y: label
138 |     :param data_name: name of dataset
139 |     :param model_name: the name of anomaly detector to generate ground truth
140 |     :return: average precision, jaccard, and anomaly score
141 |     """
142 |     path1 = eva_root + "data_od_evaluation/" + data_name + "_gt_" + model_name + ".csv"
143 |     if not os.path.exists(path1):
144 |         print("annotation file not found, labeling now...")
145 |         _, g_truth_df = evaluation_od_train(x, y, data_name, model_name)
146 |     else:
147 |         g_truth_df = pd.read_csv(path1)
148 | 
149 |     ano_idx = np.where(y == 1)[0]
150 |     dim = x.shape[1]
151 | 
152 |     auroc_list = np.zeros(len(ano_idx))
153 |     aupr_list = np.zeros(len(ano_idx))
154 |     for ii, ano in enumerate(ano_idx):
155 |         score = feature_weight[ii]
156 | 
157 |         # ground_truth metrics
158 |         gt_subspace_str = g_truth_df.loc[g_truth_df["ano_idx"] == ano]["exp_subspace"].values[0]
159 |         gt_subspace = ast.literal_eval(gt_subspace_str)
160 |         gt = np.zeros(dim, dtype=int)
161 |         gt[gt_subspace] = 1
162 | 
163 |         if len(gt_subspace) == dim:
164 |             auroc_list[ii] = 1
165 |             aupr_list[ii] = 1
166 |         else:
167 |             precision, recall, _ = metrics.precision_recall_curve(gt, score)
168 |             aupr_list[ii] = metrics.auc(recall, precision)
169 |             auroc_list[ii] = metrics.roc_auc_score(gt, score)
170 | 
171 |     return aupr_list.mean(), auroc_list.mean()
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 


--------------------------------------------------------------------------------
/model_aton/ATON.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script implements an outlier interpretation method of the following paper:
  3 | "Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network". in WWW'21.
  4 | @ Author: Hongzuo Xu
  5 | @ email: hongzuo.xu@gmail.com or leogarcia@126.com or xuhongzuo13@nudt.edu.cn
  6 | """
  7 | 
  8 | from pyod.models import lscp
  9 | import numpy as np
 10 | import time, math
 11 | import torch
 12 | import torch.utils.data as Data
 13 | import torch.optim as optim
 14 | import torch.nn.functional as F
 15 | 
 16 | from torch.optim import lr_scheduler
 17 | from sklearn.neighbors import NearestNeighbors
 18 | from sklearn import metrics
 19 | from tqdm import tqdm
 20 | from model_aton.utils import EarlyStopping, min_max_normalize
 21 | 
 22 | from model_aton.datasets import MyHardSingleTripletSelector
 23 | from model_aton.datasets import SingleTripletDataset
 24 | from model_aton.networks import ATONnet, AttentionNet
 25 | from model_aton.networks import MyLoss
 26 | 
 27 | 
 28 | class ATON:
 29 |     def __init__(self, nbrs_num=30, rand_num=30, alpha1=0.8, alpha2=0.2,
 30 |                  n_epoch=10, batch_size=64, lr=0.1, n_linear=64, margin=2.,
 31 |                  verbose=True, gpu=True):
 32 |         self.verbose = verbose
 33 | 
 34 |         self.x = None
 35 |         self.y = None
 36 |         self.ano_idx = None
 37 |         self.dim = None
 38 | 
 39 |         # a list of normal nbr of each anomaly
 40 |         self.normal_nbr_indices = []
 41 | 
 42 |         cuda = torch.cuda.is_available()
 43 |         self.device = torch.device("cuda" if cuda and gpu else "cpu")
 44 |         if cuda:
 45 |             torch.cuda.set_device(0)
 46 |         print("device:", self.device)
 47 | 
 48 |         self.nbrs_num = nbrs_num
 49 |         self.rand_num = rand_num
 50 |         self.alpha1 = alpha1
 51 |         self.alpha2 = alpha2
 52 | 
 53 |         self.n_epoch = n_epoch
 54 |         self.batch_size = batch_size
 55 |         self.lr = lr
 56 |         self.n_linear = n_linear
 57 |         self.margin = margin
 58 |         return
 59 | 
 60 |     def fit(self, x, y):
 61 |         device = self.device
 62 | 
 63 |         self.dim = x.shape[1]
 64 |         x = min_max_normalize(x)
 65 |         self.ano_idx = np.where(y == 1)[0]
 66 | 
 67 |         self.x = torch.tensor(x, dtype=torch.float32).to(device)
 68 |         self.y = torch.tensor(y, dtype=torch.int64).to(device)
 69 |         self.prepare_nbrs()
 70 | 
 71 |         # train model for each anomaly
 72 |         attn_lst, W_lst = [], []
 73 |         if self.verbose:
 74 |             iterator = range(len(self.ano_idx))
 75 |         else:
 76 |             iterator = tqdm(range(len(self.ano_idx)))
 77 |         for ii in iterator:
 78 |             idx = self.ano_idx[ii]
 79 | 
 80 |             s_t = time.time()
 81 |             attn, W = self.interpret_ano(ii)
 82 |             attn_lst.append(attn)
 83 |             W_lst.append(W)
 84 | 
 85 |             if self.verbose:
 86 |                 print("Ano_id:[{}], ({}/{}) \t time: {:.2f}s\n".format(
 87 |                     idx, (ii + 1), len(self.ano_idx),
 88 |                     (time.time() - s_t)))
 89 | 
 90 |         fea_weight_lst = []
 91 |         for ii, idx in enumerate(self.ano_idx):
 92 |             attn, w = attn_lst[ii], W_lst[ii]
 93 |             fea_weight = np.zeros(self.dim)
 94 | 
 95 |             # attention (linear space) + w --> feature weight (original space)
 96 |             for j in range(len(attn)):
 97 |                 fea_weight += attn[j] * abs(w[j])
 98 |             fea_weight_lst.append(fea_weight)
 99 |         return fea_weight_lst
100 | 
101 |     def interpret_ano(self, ii):
102 |         idx = self.ano_idx[ii]
103 |         device = self.device
104 |         dim = self.dim
105 | 
106 |         nbr_indices = self.normal_nbr_indices[ii]
107 |         data_loader, test_loader = self.prepare_triplets(idx, nbr_indices)
108 |         n_linear = self.n_linear
109 |         attn_net = AttentionNet(in_feature=3 * n_linear, n_hidden=int(1.5 * n_linear), out_feature=n_linear)
110 |         model = ATONnet(attn_net=attn_net, n_feature=dim, n_linear=n_linear)
111 |         model.to(device)
112 | 
113 |         optimizer = optim.Adam(model.parameters(), lr=self.lr, weight_decay=1e-2)
114 |         criterion = MyLoss(alpha1=self.alpha1, alpha2=self.alpha2, margin=self.margin)
115 | 
116 |         scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.1)
117 |         early_stp = EarlyStopping(patience=3, verbose=False)
118 | 
119 |         for epoch in range(self.n_epoch):
120 |             model.train()
121 |             total_loss = 0
122 |             total_dis = 0
123 |             es_time = time.time()
124 | 
125 |             batch_cnt = 0
126 |             for anchor, pos, neg in data_loader:
127 |                 anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)
128 |                 embed_anchor, embed_pos, embed_neg, attn, dis = model(anchor, pos, neg)
129 |                 loss = criterion(embed_anchor, embed_pos, embed_neg, dis)
130 | 
131 |                 total_loss += loss
132 |                 total_dis += dis.mean()
133 | 
134 |                 optimizer.zero_grad()
135 |                 loss.backward()
136 |                 optimizer.step()
137 |                 batch_cnt += 1
138 | 
139 |             train_loss = total_loss / batch_cnt
140 |             est = time.time() - es_time
141 | 
142 |             if self.verbose and (epoch + 1) % 1 == 0:
143 |                 message = 'Epoch: [{:02}/{:02}]  loss: {:.4f} Time: {:.2f}s'.format(epoch + 1, self.n_epoch,
144 |                                                                                     train_loss, est)
145 |                 print(message)
146 |             scheduler.step()
147 | 
148 |             early_stp(train_loss, model)
149 |             if early_stp.early_stop:
150 |                 model.load_state_dict(torch.load(early_stp.path))
151 |                 if self.verbose:
152 |                     print("early stopping")
153 |                 break
154 | 
155 |         # distill W and attn from network
156 |         for anchor, pos, neg in test_loader:
157 |             model.eval()
158 |             anchor, pos, neg = anchor.to(device), pos.to(device), neg.to(device)
159 |             _, _, _, attn, _ = model(anchor, pos, neg)
160 | 
161 |         attn_avg = torch.mean(attn, dim=0)
162 |         attn_avg = attn_avg.data.cpu().numpy()
163 |         W = model.linear.weight.data.cpu().numpy()
164 |         return attn_avg, W
165 | 
166 |     def prepare_triplets(self, idx, nbr_indices):
167 |         x = self.x
168 |         y = self.y
169 |         selector = MyHardSingleTripletSelector(nbrs_num=self.nbrs_num, rand_num=self.rand_num,
170 |                                                nbr_indices=nbr_indices)
171 |         dataset = SingleTripletDataset(idx, x, y, triplets_selector=selector)
172 |         data_loader = Data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
173 |         test_loader = Data.DataLoader(dataset, batch_size=len(dataset))
174 |         return data_loader, test_loader
175 | 
176 |     def prepare_nbrs(self):
177 |         x = self.x.cpu().data.numpy()
178 |         y = self.y.cpu().data.numpy()
179 | 
180 |         anom_idx = np.where(y == 1)[0]
181 |         x_anom = x[anom_idx]
182 |         noml_idx = np.where(y == 0)[0]
183 |         x_noml = x[noml_idx]
184 |         n_neighbors = self.nbrs_num
185 | 
186 |         nbrs_local = NearestNeighbors(n_neighbors=n_neighbors).fit(x_noml)
187 |         tmp_indices = nbrs_local.kneighbors(x_anom)[1]
188 | 
189 |         for idx in tmp_indices:
190 |             nbr_indices = noml_idx[idx]
191 |             self.normal_nbr_indices.append(nbr_indices)
192 |         return
193 | 


--------------------------------------------------------------------------------
/model_aton/ATON_ablation3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time, math
  3 | import torch
  4 | import torch.utils.data as Data
  5 | import torch.optim as optim
  6 | import torch.nn.functional as F
  7 | 
  8 | from torch.optim import lr_scheduler
  9 | from sklearn import metrics
 10 | from tqdm import tqdm
 11 | from model_aton.utils import EarlyStopping, min_max_normalize
 12 | 
 13 | from model_aton.datasets import MyHardSingleSelectorClf, SingleDataset
 14 | from model_aton.networks import ATONabla3net, AttentionNet, ClassificationNet
 15 | import warnings
 16 | warnings.filterwarnings("ignore")
 17 | 
 18 | 
 19 | class ATONabla3:
 20 |     def __init__(self, nbrs_num=30, rand_num=30,
 21 |                  n_epoch=10, batch_size=64, lr=0.1, n_linear=64, margin=2.,
 22 |                  verbose=True, gpu=True):
 23 |         self.verbose = verbose
 24 | 
 25 |         self.x = None
 26 |         self.y = None
 27 |         self.ano_idx = None
 28 |         self.dim = None
 29 | 
 30 |         self.reason_map = {}
 31 | 
 32 |         cuda = torch.cuda.is_available()
 33 |         self.device = torch.device("cuda" if cuda and gpu else "cpu")
 34 |         if cuda:
 35 |             torch.cuda.set_device(0)
 36 |         print("device:", self.device)
 37 | 
 38 |         self.nbrs_num = nbrs_num
 39 |         self.rand_num = rand_num
 40 | 
 41 |         self.n_epoch = n_epoch
 42 |         self.batch_size = batch_size
 43 |         self.lr = lr
 44 |         self.n_linear = n_linear
 45 |         self.margin = margin
 46 |         return
 47 | 
 48 |     def fit(self, x, y):
 49 |         device = self.device
 50 |         self.dim = x.shape[1]
 51 |         x = min_max_normalize(x)
 52 |         self.ano_idx = np.where(y == 1)[0]
 53 | 
 54 |         self.x = torch.tensor(x, dtype=torch.float32).to(device)
 55 |         self.y = torch.tensor(y, dtype=torch.int64).to(device)
 56 | 
 57 |         # train model for each anomaly
 58 |         attn_lst, W_lst = [], []
 59 |         if self.verbose:
 60 |             iterator = range(len(self.ano_idx))
 61 |         else:
 62 |             iterator = tqdm(range(len(self.ano_idx)))
 63 |         for ii in iterator:
 64 |             idx = self.ano_idx[ii]
 65 | 
 66 |             s_t = time.time()
 67 |             attn, W = self.interpret_ano(idx)
 68 |             attn_lst.append(attn)
 69 |             W_lst.append(W)
 70 | 
 71 |             if self.verbose:
 72 |                 print("ano_idx [{} ({})] attn: {}".format(ii, idx, attn))
 73 |                 print("Ano_id:[{}], ({}/{}) \t time: {:.2f}s\n".format(
 74 |                     idx, (ii + 1), len(self.ano_idx),
 75 |                     (time.time() - s_t)))
 76 | 
 77 |         fea_weight_lst = []
 78 |         for ii, idx in enumerate(self.ano_idx):
 79 |             attn, w = attn_lst[ii], W_lst[ii]
 80 |             fea_weight = np.zeros(self.dim)
 81 | 
 82 |             # attention (linear space) + w --> feature weight (original space)
 83 |             for j in range(len(attn)):
 84 |                 fea_weight += attn[j] * abs(w[j])
 85 |             fea_weight_lst.append(fea_weight)
 86 |         return fea_weight_lst
 87 | 
 88 |     def interpret_ano(self, idx):
 89 |         device = self.device
 90 |         dim = self.dim
 91 | 
 92 |         data_loader, test_loader = self.prepare_triplets(idx)
 93 |         n_linear = self.n_linear
 94 |         attn_net = AttentionNet(in_feature=n_linear, n_hidden=int(1.5 * n_linear), out_feature=n_linear)
 95 |         clf_net = ClassificationNet(n_feature=n_linear)
 96 | 
 97 |         model = ATONabla3net(attn_net=attn_net, clf_net=clf_net, n_feature=dim, n_linear=n_linear)
 98 |         model.to(device)
 99 | 
100 |         optimizer = optim.Adam(model.parameters(), lr=self.lr, weight_decay=1e-2)
101 |         criterion_cel = torch.nn.CrossEntropyLoss()
102 | 
103 |         scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.1)
104 |         early_stp = EarlyStopping(patience=3, verbose=False)
105 | 
106 |         for epoch in range(self.n_epoch):
107 |             model.train()
108 |             total_loss = 0
109 |             total_acc = 0
110 |             es_time = time.time()
111 | 
112 |             batch_cnt = 0
113 |             for batch_x, batch_y in data_loader:
114 |                 batch_x, batch_y = batch_x.to(device), batch_y.to(device)
115 | 
116 |                 clf_out, attn = model(batch_x)
117 |                 loss = criterion_cel(clf_out, batch_y)
118 | 
119 |                 _, y_pred = torch.max(F.softmax(clf_out, dim=1).data.cpu(), 1)
120 |                 clf_acc = metrics.accuracy_score(batch_y.cpu().data.numpy(), y_pred.cpu().data.numpy())
121 | 
122 |                 total_loss += loss
123 |                 total_acc += clf_acc
124 | 
125 |                 optimizer.zero_grad()
126 |                 loss.backward()
127 |                 optimizer.step()
128 |                 batch_cnt += 1
129 | 
130 |             train_loss = total_loss / batch_cnt
131 |             train_acc = total_acc / batch_cnt
132 |             est = time.time() - es_time
133 | 
134 |             if self.verbose and (epoch + 1) % 1 == 0:
135 |                 print('Epoch: [{:02}/{:02}]  loss: {:.4f} acc: {:.4f} Time: {:.2f}s'
136 |                       .format(epoch + 1, self.n_epoch, train_loss, train_acc,  est))
137 |             scheduler.step()
138 | 
139 |             early_stp(train_loss, model)
140 |             if early_stp.early_stop:
141 |                 model.load_state_dict(torch.load(early_stp.path))
142 |                 if self.verbose:
143 |                     print("early stopping")
144 |                 break
145 | 
146 |         for x, target in test_loader:
147 |             model.eval()
148 |             x = x.to(device)
149 |             _, attn = model(x)
150 | 
151 |         attn_avg = torch.mean(attn, dim=0)
152 |         attn_avg = attn_avg.data.cpu().numpy()
153 |         W = model.linear.weight.data.cpu().numpy()
154 |         return attn_avg, W
155 | 
156 |     def prepare_triplets(self, idx):
157 |         x = self.x
158 |         y = self.y
159 | 
160 |         selector = MyHardSingleSelectorClf(nbrs_num=self.nbrs_num, rand_num=self.rand_num)
161 |         dataset = SingleDataset(idx, x, y, data_selector=selector)
162 | 
163 |         data_loader = Data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
164 |         test_loader = Data.DataLoader(dataset, batch_size=len(dataset))
165 |         return data_loader, test_loader
166 | 
167 |     def weight2subspace(self, weight, r=0.7, num=-1):
168 |         threshold = r * np.sum(weight)
169 |         tmp_s = 0
170 |         exp_subspace = []
171 |         sorted_idx1 = np.argsort(weight)
172 |         sorted_idx = [sorted_idx1[self.dim - i -1] for i in range(self.dim)]
173 |         if num != -1:
174 |             exp_subspace = sorted_idx[:num]
175 |             exp_subspace = list(np.sort(exp_subspace))
176 |             return exp_subspace
177 | 
178 |         for idx in sorted_idx:
179 |             tmp_s += weight[idx]
180 |             exp_subspace.append(idx)
181 |             if tmp_s >= threshold:
182 |                 break
183 |         exp_subspace = list(np.sort(exp_subspace))
184 |         return exp_subspace
185 | 
186 |     def weight2subspace_pn(self, weight):
187 |         exp_subspace = []
188 |         for i in range(len(weight)):
189 |             if weight[i] > 0:
190 |                 exp_subspace.append(i)
191 |         if len(exp_subspace) == 0:
192 |             exp_subspace = np.arange(len(weight))
193 |         exp_subspace = list(np.sort(exp_subspace))
194 |         return exp_subspace
195 | 
196 |     def get_exp_subspace(self, fea_weight_lst, w2s_ratio, real_exp_len=None):
197 |         exp_subspace_lst = []
198 |         for ii, idx in enumerate(self.ano_idx):
199 |             fea_weight = fea_weight_lst[ii]
200 |             if w2s_ratio == "real_len":
201 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, num=real_exp_len[ii]))
202 |             elif w2s_ratio == "auto":
203 |                 r = math.sqrt(2 / self.dim)
204 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, r=r))
205 |             elif w2s_ratio == "pn":
206 |                 exp_subspace_lst.append(self.weight2subspace_pn(fea_weight))
207 |             else:
208 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, r=w2s_ratio))
209 |         return exp_subspace_lst
210 | 


--------------------------------------------------------------------------------
/model_aton/datasets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script implements an outlier interpretation method of the following paper:
  3 | "Beyond Outlier Detection: Outlier Interpretation by Attention-Guided Triplet Deviation Network". in WWW'21.
  4 | @ Author: Hongzuo Xu
  5 | @ email: hongzuo.xu@gmail.com or leogarcia@126.com or xuhongzuo13@nudt.edu.cn
  6 | """
  7 | 
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | from torch.utils.data import Dataset
 12 | from sklearn.neighbors import NearestNeighbors
 13 | 
 14 | 
 15 | class SingleTripletDataset(Dataset):
 16 |     def __init__(self, anom_idx, x, y, triplets_selector, transform=None):
 17 |         self.transform = transform
 18 |         self.data = x
 19 |         self.triplets = triplets_selector.get_triplets(anom_idx, x, y)
 20 | 
 21 |     def __getitem__(self, index):
 22 |         a_idx, p_idx, n_idx = self.triplets[index]
 23 |         anchor, positive, negative = self.data[a_idx], self.data[p_idx], self.data[n_idx]
 24 |         if self.transform is not None:
 25 |             anchor = self.transform(anchor)
 26 |             positive = self.transform(positive)
 27 |             negative = self.transform(negative)
 28 |         return anchor, positive, negative
 29 | 
 30 |     def __len__(self):
 31 |         return len(self.triplets)
 32 | 
 33 | 
 34 | class SingleDataset(Dataset):
 35 |     def __init__(self, anom_idx, x, y, data_selector, transform=None):
 36 |         self.transform = transform
 37 |         self.selected_data = data_selector.get_data(anom_idx, x, y)
 38 | 
 39 |     def __getitem__(self, index):
 40 |         data = self.selected_data[0][index]
 41 |         target = self.selected_data[1][index]
 42 |         if self.transform is not None:
 43 |             data = self.transform(data)
 44 |         return data, target
 45 | 
 46 |     def __len__(self):
 47 |         return len(self.selected_data[0])
 48 | 
 49 | 
 50 | class SingleTripletDatasetClf(Dataset):
 51 |     def __init__(self, anom_idx, x, y, triplets_selector, transform=None):
 52 |         self.transform = transform
 53 |         self.data = x
 54 |         self.triplets, self.targets = triplets_selector.get_triplets(anom_idx, x, y)
 55 | 
 56 |     def __getitem__(self, index):
 57 |         a_idx, p_idx, n_idx = self.triplets[index]
 58 |         a_target, p_target, n_target = self.targets[index]
 59 |         anchor, positive, negative = self.data[a_idx], self.data[p_idx], self.data[n_idx]
 60 |         if self.transform is not None:
 61 |             anchor = self.transform(anchor)
 62 |             positive = self.transform(positive)
 63 |             negative = self.transform(negative)
 64 |         return anchor, positive, negative, a_target, p_target, n_target
 65 | 
 66 |     def __len__(self):
 67 |         return len(self.triplets)
 68 | 
 69 | 
 70 | class MyHardSingleTripletSelector:
 71 |     def __init__(self, nbrs_num, rand_num, nbr_indices):
 72 |         self.x = None
 73 |         self.y = None
 74 |         self.nbrs_num = nbrs_num
 75 |         self.rand_num = rand_num
 76 |         self.nbr_indices = nbr_indices
 77 | 
 78 |     def get_triplets(self, anom_idx, x, y, normal_label=0):
 79 |         self.x = x.cpu().data.numpy()
 80 |         self.y = y.cpu().data.numpy()
 81 | 
 82 |         # anom_x = self.x[anom_idx]
 83 |         # x_noml = self.x[noml_idx]
 84 |         # n_neighbors = self.nbrs_num
 85 |         # nbrs_local = NearestNeighbors(n_neighbors=n_neighbors).fit(x_noml)
 86 |         # nbr_indices = noml_idx[nbrs_local.kneighbors([anom_x])[1].flatten()]
 87 | 
 88 |         noml_idx = np.where(self.y == normal_label)[0]
 89 |         nbr_indices = self.nbr_indices
 90 |         rand_num = self.rand_num
 91 | 
 92 |         rand_canddt = np.setdiff1d(noml_idx, nbr_indices)
 93 |         rand_indices = np.random.choice(rand_canddt, rand_num, replace=False)
 94 | 
 95 |         triplets = [[anchor, positive, anom_idx]
 96 |                     for anchor in rand_indices
 97 |                     for positive in nbr_indices]
 98 |         return torch.LongTensor(np.array(triplets))
 99 | 
100 | 
101 | class MyHardSingleSelectorClf:
102 |     def __init__(self, nbrs_num, rand_num):
103 |         self.nbrs_num = nbrs_num
104 |         self.rand_num = rand_num
105 | 
106 |     def get_data(self, anom_idx, x, y, normal_label=0):
107 |         x = x.cpu().data.numpy()
108 |         y = y.cpu().data.numpy()
109 | 
110 |         anom_x = x[anom_idx]
111 |         noml_idx = np.where(y == normal_label)[0]
112 |         x_noml = x[noml_idx]
113 | 
114 |         nbrs_local = NearestNeighbors(n_neighbors=self.nbrs_num).fit(x_noml)
115 |         nbr_indices = noml_idx[nbrs_local.kneighbors([anom_x])[1].flatten()]
116 |         rand_canddt = np.setdiff1d(noml_idx, nbr_indices)
117 |         rand_indices = np.random.choice(rand_canddt, self.rand_num, replace=False)
118 | 
119 |         # perturbation to augment
120 |         dim = x.shape[1]
121 |         anom_lst = []
122 |         anom_lst.append(anom_x)
123 |         for i in range(self.rand_num + self.nbrs_num -1):
124 |             new_anom_x = anom_x.copy()
125 |             choose_f = np.random.choice(np.arange(dim), 3)
126 |             for a in choose_f:
127 |                 new_anom_x[a] = anom_x[a] * 1.01
128 |             anom_lst.append(new_anom_x)
129 | 
130 |         data_idx = np.hstack([rand_indices, nbr_indices])
131 |         norm_data = x[data_idx]
132 |         data = np.vstack([np.array(anom_lst), norm_data])
133 |         target = np.hstack([np.ones(10), np.zeros(len(rand_indices), dtype=int), np.zeros(len(nbr_indices), dtype=int)])
134 | 
135 |         return torch.FloatTensor(data), torch.LongTensor(target)
136 | 
137 | 
138 | class MyHardSingleTripletSelectorClf:
139 |     def __init__(self, nbrs_num, rand_num):
140 |         self.x = None
141 |         self.y = None
142 |         self.nbrs_num = nbrs_num
143 |         self.rand_num = rand_num
144 | 
145 |     def get_triplets(self, anom_idx, x, y, normal_label=0):
146 |         self.x = x.cpu().data.numpy()
147 |         self.y = y.cpu().data.numpy()
148 | 
149 |         anom_x = self.x[anom_idx]
150 |         noml_idx = np.where(self.y == normal_label)[0]
151 |         x_noml = self.x[noml_idx]
152 |         n_neighbors = self.nbrs_num
153 |         rand_num = self.rand_num
154 | 
155 |         nbrs_local = NearestNeighbors(n_neighbors=n_neighbors).fit(x_noml)
156 | 
157 |         nbr_indices = noml_idx[nbrs_local.kneighbors([anom_x])[1].flatten()]
158 |         # nbr_dist = nbrs_local.kneighbors([anom_x])[0].flatten()
159 | 
160 |         rand_canddt = np.setdiff1d(noml_idx, nbr_indices)
161 |         rand_indices = np.random.choice(rand_canddt, rand_num, replace=False)
162 | 
163 |         triplets = [[anchor, positive, anom_idx]
164 |                     for anchor in rand_indices
165 |                     for positive in nbr_indices]
166 | 
167 |         # print("Generate triplets Num: [%d]" % len(triplets))
168 |         target = [[0, 0, 1]] * len(triplets)
169 | 
170 |         return torch.LongTensor(np.array(triplets)), torch.LongTensor(np.array(target))
171 | 
172 | 
173 | class MyHardSingleTripletSelector2:
174 |     def __init__(self, nbrs_num, rand_num):
175 |         self.x = None
176 |         self.y = None
177 |         self.nbrs_num = nbrs_num
178 |         self.rand_num = rand_num
179 | 
180 |     def get_triplets(self, anom_idx, x, y, normal_label=0):
181 |         self.x = x.cpu().data.numpy()
182 |         self.y = y.cpu().data.numpy()
183 | 
184 |         n_neighbors = self.nbrs_num
185 |         rand_num = self.rand_num
186 | 
187 |         anom_x = self.x[anom_idx]
188 | 
189 |         anom_indices = np.where(self.y != normal_label)[0]
190 |         noml_indices = np.where(self.y == normal_label)[0]
191 |         noml_x = self.x[noml_indices]
192 |         
193 |         nbrs_local = NearestNeighbors(n_neighbors=n_neighbors).fit(noml_x)
194 |         nbr_indices = noml_indices[nbrs_local.kneighbors([anom_x])[1].flatten()]
195 |         # nbr_dist = nbrs_local.kneighbors([anom_x])[0].flatten()
196 | 
197 |         rand_canddt_nor = np.setdiff1d(noml_indices, nbr_indices)
198 |         rand_nor_indices = np.random.choice(rand_canddt_nor, rand_num, replace=False)
199 | 
200 |         triplets1 = [[anchor, positive, anom_idx]
201 |                      for anchor in rand_nor_indices
202 |                      for positive in nbr_indices]
203 |         
204 |         rand_canddt_ano = np.setdiff1d(anom_indices, anom_idx)
205 |         if len(rand_canddt_ano) < rand_num:
206 |             rand_ano_indices = rand_canddt_ano
207 |         else:
208 |             rand_ano_indices = np.random.choice(rand_canddt_ano, rand_num, replace=False)
209 | 
210 |         triplets2 = [[anchor, anom_idx, negative]
211 |                      for anchor in rand_ano_indices
212 |                      for negative in nbr_indices]
213 |         triplets = triplets1 + triplets2
214 | 
215 |         # print("Generate triplets Num: [%d]" % len(triplets))
216 |         target1 = [[0, 0, 1]] * len(triplets1)
217 |         target2 = [[1, 1, 0]] * len(triplets2)
218 |         target = target1 + target2
219 | 
220 |         return torch.LongTensor(np.array(triplets)), torch.LongTensor(np.array(target))
221 | 
222 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | this script can perform outlier interpretation method ATON, COIN, SHAP, LIME, and IntGrad
  3 | These methods use feature weight as interpretation
  4 | 
  5 | @ Author: Hongzuo Xu
  6 | @ email: hongzuo.xu@gmail.com or leogarcia@126.com or xuhongzuo13@nudt.edu.cn
  7 | """
  8 | 
  9 | import os
 10 | import ast
 11 | import glob
 12 | import time, datetime
 13 | import argparse
 14 | import pandas as pd
 15 | import numpy as np
 16 | from prettytable import PrettyTable
 17 | 
 18 | from model_aton.ATON import ATON
 19 | from model_aton.ATON_ablation import ATONabla
 20 | from model_aton.ATON_ablation2 import ATONabla2
 21 | from model_aton.ATON_ablation3 import ATONabla3
 22 | from model_iml.SHAP import SHAP
 23 | from model_iml.LIME import LIME
 24 | from model_coin.COIN import COIN
 25 | # from model_iml.IntGrad import IntGrad
 26 | 
 27 | from utils import model_utils
 28 | from utils.eval_print_utils import print_eval_runs
 29 | from eval.evaluation_od import evaluation_od, evaluation_od_auc
 30 | from config import root, eva_root, get_parser
 31 | import warnings
 32 | 
 33 | warnings.filterwarnings("ignore")
 34 | 
 35 | # ------------------- parser ----------------- #
 36 | algorithm_name = "aton"
 37 | parser = argparse.ArgumentParser()
 38 | parser.add_argument('--gpu', type=ast.literal_eval, default=True)
 39 | parser.add_argument('--eval', type=ast.literal_eval, default=True, help='Evaluate the interpretation results or not')
 40 | parser.add_argument('--path', type=str, default="data/", help='the input data path, can be a single csv '
 41 |                                                               'or a data folder')
 42 | parser.add_argument('--w2s_ratio', type=str, default='real_len', help='\'real-len\', \'auto\', \'pn\', or a ratio.')
 43 | parser.add_argument('--runs', type=int, default=1)
 44 | parser.add_argument('--record_name', type=str, default='')
 45 | parser = get_parser(algorithm_name, parser)
 46 | args = parser.parse_args()
 47 | 
 48 | input_root_list = [root + args.path]
 49 | w2s_ratio = args.w2s_ratio
 50 | od_eval_model = ["iforest", "copod", "hbos"]  # we obtain ground-truth annotations using three outlier detection methods
 51 | runs = args.runs
 52 | record_name = args.record_name
 53 | 
 54 | # ------------------- record ----------------- #
 55 | if not os.path.exists("record/" + algorithm_name):
 56 |     os.makedirs("record/" + algorithm_name)
 57 | if not os.path.exists("checkpoints"):
 58 |     os.makedirs("checkpoints/")
 59 | record_path = "record/" + algorithm_name + "/zout." + \
 60 |               algorithm_name + "." + record_name + ".txt"
 61 | doc = open(record_path, 'a')
 62 | tab1 = PrettyTable(["parameter", "value"])
 63 | tab1.add_row(["@ data", str(input_root_list)])
 64 | tab1.add_row(["@ algorithm_name", str(algorithm_name)])
 65 | tab1.add_row(["@ w2s_ratio", str(w2s_ratio)])
 66 | tab1.add_row(["@ runs", str(runs)])
 67 | tab1.add_row(["@ od_eval_model", str(od_eval_model)])
 68 | tab1.add_row(["@ start_time", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")])
 69 | for k in list(vars(args).keys()):
 70 |     tab1.add_row([k, vars(args)[k]])
 71 | print(tab1, file=doc)
 72 | print(tab1)
 73 | doc.close()
 74 | time.sleep(0.2)
 75 | 
 76 | 
 77 | def main(path, run_times):
 78 |     print("eval:", args.eval)
 79 |     print("gpu :", args.gpu)
 80 |     data_name = path.split("/")[-1].split(".")[0]
 81 | 
 82 |     # this is to remove the prefix index number of data set name, so that we can match the annotation file.
 83 |     data_name = data_name[3:]
 84 | 
 85 |     print("# ------------------ %s ------------------ # " % data_name)
 86 | 
 87 |     df = pd.read_csv(path)
 88 |     X = df.values[:, :-1]
 89 |     y = np.array(df.values[:, -1], dtype=int)
 90 | 
 91 |     # get the real length of the ground-truth interpretation if the w2s_ratio is true
 92 |     real_len_lst = []
 93 |     runs_metric_lst = [[] for k in range(len(od_eval_model))]
 94 |     if args.eval and args.w2s_ratio == "real_len":
 95 |         gt_lst = []
 96 |         for eval_m in od_eval_model:
 97 |             folder = eva_root + "data_od_evaluation/"
 98 |             gt_path = os.path.join(folder, data_name + "_gt_" + eval_m + ".csv")
 99 |             if len(glob.glob(gt_path)) == 0:
100 |                 raise FileNotFoundError("no such gt file:" + gt_path)
101 |             gt_str = pd.read_csv(gt_path)["exp_subspace"].values
102 |             gt_lst.append([ast.literal_eval(gtt) for gtt in gt_str])
103 | 
104 |         for gt in gt_lst:
105 |             real_len_lst.append([len(gtt) for gtt in gt])
106 | 
107 |     t = 0
108 |     for i in range(run_times):
109 |         print("runs: %d" % (i + 1))
110 |         time1 = time.time()
111 | 
112 |         # ------------ run the chosen algorithm to get interpretation (feature weight) ------------- #
113 |         fea_weight_lst = run_model(algorithm_name, X, y)
114 | 
115 |         # ------------------- transfer feature weight to subspace ----------------- #
116 |         subspace_outputs = []
117 |         if args.eval:
118 |             for j in range(len(od_eval_model)):
119 |                 if w2s_ratio == "real_len":
120 |                     real_len = real_len_lst[j]
121 |                     subspace = model_utils.get_exp_subspace(fea_weight_lst, w2s_ratio=w2s_ratio, real_exp_len=real_len)
122 |                 else:
123 |                     subspace = model_utils.get_exp_subspace(fea_weight_lst, w2s_ratio=w2s_ratio)
124 |                 subspace_outputs.append(subspace)
125 | 
126 |         t = time.time() - time1
127 | 
128 |         # ---------------------- evaluation -------------------------- #
129 |         if args.eval:
130 |             for mm, eval_model in enumerate(od_eval_model):
131 |                 p, j, s = evaluation_od(subspace_outputs[mm], X, y, data_name, eval_model)
132 |                 auroc, aupr = evaluation_od_auc(fea_weight_lst, X, y, data_name, model_name=eval_model)
133 |                 metric_lst = [p, j, s, auroc, aupr, t]
134 |                 runs_metric_lst[mm].append(metric_lst)
135 |                 print("data: {}, eval_model: {}, {}".format(path.split("/")[-1].split(".")[0], eval_model, metric_lst))
136 | 
137 |     if args.eval:
138 |         name = path.split("/")[-1].split(".")[0]
139 |         for mm in range(len(od_eval_model)):
140 |             txt = print_eval_runs(runs_metric_lst[mm], data_name=name, algo_name=algorithm_name)
141 |             print(txt)
142 | 
143 |             doc = open(record_path, 'a')
144 |             print(txt, file=doc)
145 |             doc.close()
146 |     else:
147 |         txt = data_name + "," + str(round(t, 2)) + "," + algorithm_name
148 |         print(txt)
149 |         doc = open(record_path, 'a')
150 |         print(txt, file=doc)
151 |         doc.close()
152 |     return
153 | 
154 | 
155 | def run_model(algorithm, X, y):
156 |     if algorithm == "aton":
157 |         model = ATON(verbose=False, gpu=args.gpu,
158 |                      nbrs_num=args.nbrs_num, rand_num=args.rand_num,
159 |                      alpha1=args.alpha1, alpha2=args.alpha2,
160 |                      n_epoch=args.n_epoch, batch_size=args.batch_size, lr=args.lr,
161 |                      n_linear=args.n_linear, margin=args.margin)
162 |         fea_weight_lst = model.fit(X, y)
163 | 
164 |     elif algorithm == "aton_ablation":
165 |         model = ATONabla(verbose=False,
166 |                          nbrs_num=args.nbrs_num, rand_num=args.rand_num, n_epoch=args.n_epoch,
167 |                          batch_size=args.batch_size, lr=args.lr, n_linear=args.n_linear, margin=args.margin)
168 |         fea_weight_lst = model.fit(X, y)
169 | 
170 |     elif algorithm == "aton_ablation2":
171 |         model = ATONabla2(verbose=False,
172 |                           nbrs_num=args.nbrs_num, rand_num=args.rand_num, n_epoch=args.n_epoch,
173 |                           batch_size=args.batch_size, lr=args.lr, margin=args.margin)
174 |         fea_weight_lst = model.fit(X, y)
175 | 
176 |     elif algorithm == "aton_ablation3":
177 |         model = ATONabla3(verbose=False, gpu=True,
178 |                           nbrs_num=args.nbrs_num, rand_num=args.rand_num, n_epoch=args.n_epoch,
179 |                           batch_size=args.batch_size, lr=args.lr, n_linear=args.n_linear, margin=args.margin)
180 |         fea_weight_lst = model.fit(X, y)
181 | 
182 |     elif algorithm == "shap":
183 |         model = SHAP(kernel=args.kernel, n_sample=args.n_sample, threshold=args.threshold)
184 |         fea_weight_lst = model.fit(X, y)
185 | 
186 |     elif algorithm == "lime":
187 |         model = LIME(discretize_continuous=args.discretize_continuous, discretizer=args.discretizer)
188 |         fea_weight_lst = model.fit(X, y)
189 | 
190 |     # elif algorithm == "intgrad":
191 |     #     model = IntGrad(n_steps=args.n_steps, method=args.method)
192 |     #     fea_weight_lst = model.fit(X, y)
193 | 
194 |     elif algorithm == "coin":
195 |         sgnf_prior = 1
196 |         model = COIN(X, y, args.ratio_nbr, AUG=args.AUG, MIN_CLUSTER_SIZE=args.MIN_CLUSTER_SIZE,
197 |                      MAX_NUM_CLUSTER=args.MAX_NUM_CLUSTER, VAL_TIMES=args.VAL_TIMES,
198 |                      C_SVM=args.C_SVM, THRE_PS=args.THRE_PS, DEFK=args.DEFK)
199 |         fea_weight_lst = model.fit(sgnf_prior)
200 |     else:
201 |         raise NotImplementedError("not implemented the algorithm")
202 |     return fea_weight_lst
203 | 
204 | 
205 | if __name__ == '__main__':
206 |     for input_root in input_root_list:
207 |         if os.path.isdir(input_root):
208 |             for file_name in sorted(os.listdir(input_root)):
209 |                 if file_name.endswith(".csv"):
210 |                     input_path = str(os.path.join(input_root, file_name))
211 |                     main(input_path, runs)
212 | 
213 |         else:
214 |             input_path = input_root
215 |             main(input_path, runs)
216 | 


--------------------------------------------------------------------------------
/data/01-vertebral.csv:
--------------------------------------------------------------------------------
  1 | A0,A1,A2,A3,A4,A5,class
  2 | 63.03,22.55,39.61,40.48,98.67,-0.25,0.0
  3 | 39.06,10.06,25.02,29.0,114.41,4.56,0.0
  4 | 68.83,22.22,50.09,46.61,105.99,-3.53,0.0
  5 | 69.3,24.65,44.31,44.64,101.87,11.21,0.0
  6 | 49.71,9.65,28.32,40.06,108.17,7.92,0.0
  7 | 40.25,13.92,25.12,26.33,130.33,2.23,0.0
  8 | 53.43,15.86,37.17,37.57,120.57,5.99,0.0
  9 | 45.37,10.76,29.04,34.61,117.27,-10.68,0.0
 10 | 43.79,13.53,42.69,30.26,125.0,13.29,0.0
 11 | 36.69,5.01,41.95,31.68,84.24,0.66,0.0
 12 | 49.71,13.04,31.33,36.67,108.65,-7.83,0.0
 13 | 31.23,17.72,15.5,13.52,120.06,0.5,0.0
 14 | 48.92,19.96,40.26,28.95,119.32,8.03,0.0
 15 | 53.57,20.46,33.1,33.11,110.97,7.04,0.0
 16 | 57.3,24.19,47.0,33.11,116.81,5.77,0.0
 17 | 44.32,12.54,36.1,31.78,124.12,5.42,0.0
 18 | 63.83,20.36,54.55,43.47,112.31,-0.62,0.0
 19 | 31.28,3.14,32.56,28.13,129.01,3.62,0.0
 20 | 38.7,13.44,31.0,25.25,123.16,1.43,0.0
 21 | 41.73,12.25,30.12,29.48,116.59,-1.24,0.0
 22 | 43.92,14.18,37.83,29.74,134.46,6.45,0.0
 23 | 54.92,21.06,42.2,33.86,125.21,2.43,0.0
 24 | 63.07,24.41,54.0,38.66,106.42,15.78,0.0
 25 | 45.54,13.07,30.3,32.47,117.98,-4.99,0.0
 26 | 36.13,22.76,29.0,13.37,115.58,-3.24,0.0
 27 | 54.12,26.65,35.33,27.47,121.45,1.57,0.0
 28 | 26.15,10.76,14.0,15.39,125.2,-10.09,0.0
 29 | 43.58,16.51,47.0,27.07,109.27,8.99,0.0
 30 | 44.55,21.93,26.79,22.62,111.07,2.65,0.0
 31 | 66.88,24.89,49.28,41.99,113.48,-2.01,0.0
 32 | 50.82,15.4,42.53,35.42,112.19,10.87,0.0
 33 | 46.39,11.08,32.14,35.31,98.77,6.39,0.0
 34 | 44.94,17.44,27.78,27.49,117.98,5.57,0.0
 35 | 38.66,12.99,40.0,25.68,124.91,2.7,0.0
 36 | 59.6,32.0,46.56,27.6,119.33,1.47,0.0
 37 | 31.48,7.83,24.28,23.66,113.83,4.39,0.0
 38 | 32.09,6.99,36.0,25.1,132.26,6.41,0.0
 39 | 35.7,19.44,20.7,16.26,137.54,-0.26,0.0
 40 | 55.84,28.85,47.69,27.0,123.31,2.81,0.0
 41 | 52.42,19.01,35.87,33.41,116.56,1.69,0.0
 42 | 35.49,11.7,15.59,23.79,106.94,-3.46,0.0
 43 | 46.44,8.4,29.04,38.05,115.48,2.05,0.0
 44 | 53.85,19.23,32.78,34.62,121.67,5.33,0.0
 45 | 66.29,26.33,47.5,39.96,121.22,-0.8,0.0
 46 | 56.03,16.3,62.28,39.73,114.02,-2.33,0.0
 47 | 50.91,23.02,47.0,27.9,117.42,-2.53,0.0
 48 | 48.33,22.23,36.18,26.1,117.38,6.48,0.0
 49 | 41.35,16.58,30.71,24.78,113.27,-4.5,0.0
 50 | 40.56,17.98,34.0,22.58,121.05,-1.54,0.0
 51 | 41.77,17.9,20.03,23.87,118.36,2.06,0.0
 52 | 55.29,20.44,34.0,34.85,115.88,3.56,0.0
 53 | 74.43,41.56,27.7,32.88,107.95,5.0,0.0
 54 | 50.21,29.76,36.1,20.45,128.29,5.74,0.0
 55 | 30.15,11.92,34.0,18.23,112.68,11.46,0.0
 56 | 41.17,17.32,33.47,23.85,116.38,-9.57,0.0
 57 | 47.66,13.28,36.68,34.38,98.25,6.27,0.0
 58 | 43.35,7.47,28.07,35.88,112.78,5.75,0.0
 59 | 46.86,15.35,38.0,31.5,116.25,1.66,0.0
 60 | 43.2,19.66,35.0,23.54,124.85,-2.92,0.0
 61 | 48.11,14.93,35.56,33.18,124.06,7.95,0.0
 62 | 74.38,32.05,78.77,42.32,143.56,56.13,0.0
 63 | 89.68,32.7,83.13,56.98,129.96,92.03,0.0
 64 | 44.53,9.43,52.0,35.1,134.71,29.11,0.0
 65 | 77.69,21.38,64.43,56.31,114.82,26.93,0.0
 66 | 76.15,21.94,82.96,54.21,123.93,10.43,0.0
 67 | 83.93,41.29,62.0,42.65,115.01,26.59,0.0
 68 | 78.49,22.18,60.0,56.31,118.53,27.38,0.0
 69 | 75.65,19.34,64.15,56.31,95.9,69.55,0.0
 70 | 72.08,18.95,51.0,53.13,114.21,1.01,0.0
 71 | 58.6,-0.26,51.5,58.86,102.04,28.06,0.0
 72 | 72.56,17.39,52.0,55.18,119.19,32.11,0.0
 73 | 86.9,32.93,47.79,53.97,135.08,101.72,0.0
 74 | 84.97,33.02,60.86,51.95,125.66,74.33,0.0
 75 | 55.51,20.1,44.0,35.42,122.65,34.55,0.0
 76 | 72.22,23.08,91.0,49.14,137.74,56.8,0.0
 77 | 70.22,39.82,68.12,30.4,148.53,145.38,0.0
 78 | 86.75,36.04,69.22,50.71,139.41,110.86,0.0
 79 | 58.78,7.67,53.34,51.12,98.5,51.58,0.0
 80 | 67.41,17.44,60.14,49.97,111.12,33.16,0.0
 81 | 47.74,12.09,39.0,35.66,117.51,21.68,0.0
 82 | 77.11,30.47,69.48,46.64,112.15,70.76,0.0
 83 | 74.01,21.12,57.38,52.88,120.21,74.56,0.0
 84 | 88.62,29.09,47.56,59.53,121.76,51.81,0.0
 85 | 81.1,24.79,77.89,56.31,151.84,65.21,0.0
 86 | 76.33,42.4,57.2,33.93,124.27,50.13,0.0
 87 | 45.44,9.91,45.0,35.54,163.07,20.32,0.0
 88 | 59.79,17.88,59.21,41.91,119.32,22.12,0.0
 89 | 44.91,10.22,44.63,34.7,130.08,37.36,0.0
 90 | 56.61,16.8,42.0,39.81,127.29,24.02,0.0
 91 | 71.19,23.9,43.7,47.29,119.86,27.28,0.0
 92 | 81.66,28.75,58.23,52.91,114.77,30.61,0.0
 93 | 70.95,20.16,62.86,50.79,116.18,32.52,0.0
 94 | 85.35,15.84,71.67,69.51,124.42,76.02,0.0
 95 | 58.1,14.84,79.65,43.26,113.59,50.24,0.0
 96 | 94.17,15.38,67.71,78.79,114.89,53.26,0.0
 97 | 57.52,33.65,50.91,23.88,140.98,148.75,0.0
 98 | 96.66,19.46,90.21,77.2,120.67,64.08,0.0
 99 | 74.72,19.76,82.74,54.96,109.36,33.31,0.0
100 | 77.66,22.43,93.89,55.22,123.06,61.21,0.0
101 | 58.52,13.92,41.47,44.6,115.51,30.39,0.0
102 | 84.59,30.36,65.48,54.22,108.01,25.12,0.0
103 | 79.94,18.77,63.31,61.16,114.79,38.54,0.0
104 | 70.4,13.47,61.2,56.93,102.34,25.54,0.0
105 | 49.78,6.47,53.0,43.32,110.86,25.34,0.0
106 | 77.41,29.4,63.23,48.01,118.45,93.56,0.0
107 | 65.01,27.6,50.95,37.41,116.58,7.02,0.0
108 | 65.01,9.84,57.74,55.18,94.74,49.7,0.0
109 | 78.43,33.43,76.28,45.0,138.55,77.16,0.0
110 | 63.17,6.33,63.0,56.84,110.64,42.61,0.0
111 | 68.61,15.08,63.01,53.53,123.43,39.5,0.0
112 | 63.9,13.71,62.12,50.19,114.13,41.42,0.0
113 | 85.0,29.61,83.35,55.39,126.91,71.32,0.0
114 | 42.02,-6.55,67.9,48.58,111.59,27.34,0.0
115 | 69.76,19.28,48.5,50.48,96.49,51.17,0.0
116 | 80.99,36.84,86.96,44.14,141.09,85.87,0.0
117 | 129.83,8.4,48.38,121.43,107.69,418.54,0.0
118 | 70.48,12.49,62.42,57.99,114.19,56.9,0.0
119 | 86.04,38.75,47.87,47.29,122.09,61.99,0.0
120 | 65.54,24.16,45.78,41.38,136.44,16.38,0.0
121 | 60.75,15.75,43.2,45.0,113.05,31.69,0.0
122 | 54.74,12.1,41.0,42.65,117.64,40.38,0.0
123 | 83.88,23.08,87.14,60.8,124.65,80.56,0.0
124 | 80.07,48.07,52.4,32.01,110.71,67.73,0.0
125 | 65.67,10.54,56.49,55.12,109.16,53.93,0.0
126 | 74.72,14.32,32.5,60.4,107.18,37.02,0.0
127 | 48.06,5.69,57.06,42.37,95.44,32.84,0.0
128 | 70.68,21.7,59.18,48.97,103.01,27.81,0.0
129 | 80.43,17.0,66.54,63.43,116.44,57.78,0.0
130 | 90.51,28.27,69.81,62.24,100.89,58.82,0.0
131 | 77.24,16.74,49.78,60.5,110.69,39.79,0.0
132 | 50.07,9.12,32.17,40.95,99.71,26.77,0.0
133 | 69.78,13.78,58.0,56.0,118.93,17.91,0.0
134 | 69.63,21.12,52.77,48.5,116.8,54.82,0.0
135 | 81.75,20.12,70.56,61.63,119.43,55.51,0.0
136 | 52.2,17.21,78.09,34.99,136.97,54.94,0.0
137 | 77.12,30.35,77.48,46.77,110.61,82.09,0.0
138 | 88.02,39.84,81.77,48.18,116.6,56.77,0.0
139 | 83.4,34.31,78.42,49.09,110.47,49.67,0.0
140 | 72.05,24.7,79.87,47.35,107.17,56.43,0.0
141 | 85.1,21.07,91.73,64.03,109.06,38.03,0.0
142 | 69.56,15.4,74.44,54.16,105.07,29.7,0.0
143 | 89.5,48.9,72.0,40.6,134.63,118.35,0.0
144 | 85.29,18.28,100.74,67.01,110.66,58.88,0.0
145 | 60.63,20.6,64.54,40.03,117.23,104.86,0.0
146 | 60.04,14.31,58.04,45.73,105.13,30.41,0.0
147 | 85.64,42.69,78.75,42.95,105.14,42.89,0.0
148 | 85.58,30.46,78.23,55.12,114.87,68.38,0.0
149 | 55.08,-3.76,56.0,58.84,109.92,31.77,0.0
150 | 65.76,9.83,50.82,55.92,104.39,39.31,0.0
151 | 79.25,23.94,40.8,55.3,98.62,36.71,0.0
152 | 81.11,20.69,60.69,60.42,94.02,40.51,0.0
153 | 48.03,3.97,58.34,44.06,125.35,35.0,0.0
154 | 63.4,14.12,48.14,49.29,111.92,31.78,0.0
155 | 57.29,15.15,64.0,42.14,116.74,30.34,0.0
156 | 41.19,5.79,42.87,35.39,103.35,27.66,0.0
157 | 66.8,14.55,72.08,52.25,82.46,41.69,0.0
158 | 79.48,26.73,70.65,52.74,118.59,61.7,0.0
159 | 44.22,1.51,46.11,42.71,108.63,42.81,0.0
160 | 57.04,0.35,49.2,56.69,103.05,52.17,0.0
161 | 64.27,12.51,68.7,51.77,95.25,39.41,0.0
162 | 92.03,35.39,77.42,56.63,115.72,58.06,0.0
163 | 67.26,7.19,51.7,60.07,97.8,42.14,0.0
164 | 118.14,38.45,50.84,79.7,81.02,74.04,0.0
165 | 115.92,37.52,76.8,78.41,104.7,81.2,0.0
166 | 53.94,9.31,43.1,44.64,124.4,25.08,0.0
167 | 83.7,20.27,77.11,63.43,125.48,69.28,0.0
168 | 56.99,6.87,57.01,50.12,109.98,36.81,0.0
169 | 72.34,16.42,59.87,55.92,70.08,12.07,0.0
170 | 95.38,24.82,95.16,70.56,89.31,57.66,0.0
171 | 44.25,1.1,38.0,43.15,98.27,23.91,0.0
172 | 64.81,15.17,58.84,49.64,111.68,21.41,0.0
173 | 78.4,14.04,79.69,64.36,104.73,12.39,0.0
174 | 56.67,13.46,43.77,43.21,93.69,21.11,0.0
175 | 50.83,9.06,56.3,41.76,79.0,23.04,0.0
176 | 61.41,25.38,39.1,36.03,103.4,21.84,0.0
177 | 56.56,8.96,52.58,47.6,98.78,50.7,0.0
178 | 67.03,13.28,66.15,53.75,100.72,33.99,0.0
179 | 80.82,19.24,61.64,61.58,89.47,44.17,0.0
180 | 80.65,26.34,60.9,54.31,120.1,52.47,0.0
181 | 68.72,49.43,68.06,19.29,125.02,54.69,0.0
182 | 37.9,4.48,24.71,33.42,157.85,33.61,0.0
183 | 64.62,15.23,67.63,49.4,90.3,31.33,0.0
184 | 75.44,31.54,89.6,43.9,106.83,54.97,0.0
185 | 71.0,37.52,84.54,33.49,125.16,67.77,0.0
186 | 81.06,20.8,91.78,60.26,125.43,38.18,0.0
187 | 91.47,24.51,84.62,66.96,117.31,52.62,0.0
188 | 81.08,21.26,78.77,59.83,90.07,49.16,0.0
189 | 60.42,5.27,59.81,55.15,109.03,30.27,0.0
190 | 85.68,38.65,82.68,47.03,120.84,61.96,0.0
191 | 82.41,29.28,77.05,53.13,117.04,62.77,0.0
192 | 43.72,9.81,52.0,33.91,88.43,40.88,0.0
193 | 86.47,40.3,61.14,46.17,97.4,55.75,0.0
194 | 74.47,33.28,66.94,41.19,146.47,124.98,0.0
195 | 70.25,10.34,76.37,59.91,119.24,32.67,0.0
196 | 72.64,18.93,68.0,53.71,116.96,25.38,0.0
197 | 71.24,5.27,86.0,65.97,110.7,38.26,0.0
198 | 63.77,12.76,65.36,51.01,89.82,56.0,0.0
199 | 58.83,37.58,125.74,21.25,135.63,117.31,0.0
200 | 74.85,13.91,62.69,60.95,115.21,33.17,0.0
201 | 75.3,16.67,61.3,58.63,118.88,31.58,0.0
202 | 63.36,20.02,67.5,43.34,131.0,37.56,0.0
203 | 67.51,33.28,96.28,34.24,145.6,88.3,0.0
204 | 76.31,41.93,93.28,34.38,132.27,101.22,0.0
205 | 73.64,9.71,63.0,63.92,98.73,26.98,0.0
206 | 56.54,14.38,44.99,42.16,101.72,25.77,0.0
207 | 80.11,33.94,85.1,46.17,125.59,100.29,0.0
208 | 95.48,46.55,59.0,48.93,96.68,77.28,0.0
209 | 74.09,18.82,76.03,55.27,128.41,73.39,0.0
210 | 87.68,20.37,93.82,67.31,120.94,76.73,0.0
211 | 48.26,16.42,36.33,31.84,94.88,28.34,0.0
212 | 61.73,17.11,46.9,44.62,120.92,3.09,1.0
213 | 64.31,26.33,50.96,37.98,106.18,3.12,1.0
214 | 61.54,19.68,52.89,41.86,118.69,4.82,1.0
215 | 54.95,5.87,53.0,49.09,126.97,-0.63,1.0
216 | 48.8,18.02,52.0,30.78,139.15,10.44,1.0
217 | 40.41,-1.33,30.98,41.74,119.34,-6.17,1.0
218 | 53.94,20.72,29.22,33.22,114.37,-0.42,1.0
219 | 82.91,29.89,58.25,53.01,110.71,6.08,1.0
220 | 56.1,13.11,62.64,43.0,116.23,31.17,1.0
221 | 45.25,8.69,41.58,36.56,118.55,0.21,1.0
222 | 39.09,5.54,26.93,33.55,131.58,-0.76,1.0
223 | 49.0,13.11,51.87,35.88,126.4,0.54,1.0
224 | 67.54,14.66,58.0,52.88,123.63,25.97,1.0
225 | 54.75,9.75,48.0,45.0,123.04,8.24,1.0
226 | 54.6,21.49,29.36,33.11,118.34,-1.47,1.0
227 | 74.57,15.72,58.62,58.84,105.42,0.6,1.0
228 | 51.53,13.52,35.0,38.01,126.72,13.93,1.0
229 | 34.76,2.63,29.5,32.12,127.14,-0.46,1.0
230 | 38.13,6.56,50.45,31.57,132.11,6.34,1.0
231 | 43.12,13.82,40.35,29.3,128.52,0.97,1.0
232 | 54.5,6.82,47.0,47.68,111.79,-4.41,1.0
233 | 49.83,16.74,28.0,33.09,121.44,1.91,1.0
234 | 50.68,6.46,35.0,44.22,116.59,-0.21,1.0
235 | 74.98,14.92,53.73,60.05,105.65,1.59,1.0
236 | 48.17,9.59,39.71,38.58,135.62,5.36,1.0
237 | 63.62,16.93,49.35,46.68,117.09,-0.36,1.0
238 | 50.75,20.24,37.0,30.52,122.34,2.29,1.0
239 | 50.16,-2.97,42.0,53.13,131.8,-8.29,1.0
240 | 46.24,10.06,37.0,36.17,128.06,-5.1,1.0
241 | 69.0,13.29,55.57,55.71,126.61,10.83,1.0
242 | 


--------------------------------------------------------------------------------
/model_sinne/SiNNE.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.neighbors import NearestNeighbors
  3 | from model_sinne import utils
  4 | from tqdm import tqdm
  5 | import pandas as pd
  6 | 
  7 | 
  8 | class SiNNE:
  9 |     def __init__(self, max_level="full", width=10, ensemble_num=100, sample_num=8, pretrain=False, verbose=False):
 10 |         self.max_level = max_level
 11 |         self.width = width
 12 |         self.t = ensemble_num
 13 |         self.phi = sample_num
 14 | 
 15 |         self.pretrain = pretrain
 16 |         self.pretrain_nn_model_map = None
 17 |         self.verbose = verbose
 18 | 
 19 |         return
 20 | 
 21 |     def fit(self, x, y):
 22 |         dim = x.shape[1]
 23 |         norm_idx = np.where(y == 0)[0]
 24 |         anom_idx = np.where(y == 1)[0]
 25 |         anom_x = x[anom_idx]
 26 |         norm_x = x[norm_idx]
 27 | 
 28 |         if self.pretrain:
 29 |             self.pretrain_nn_model_map = self.pretrain_nn_models_pwset(x, y)
 30 |             nn_model_map = self.pretrain_nn_model_map
 31 |         else:
 32 |             nn_model_map = {}
 33 | 
 34 |         if self.max_level == "full":
 35 |             self.max_level = dim
 36 |         else:
 37 |             self.max_level = int(self.max_level)
 38 | 
 39 |         # step1: get the scores of dim=1
 40 |         dim1_scores_lst = np.zeros([dim, len(anom_idx)])
 41 |         if self.verbose:
 42 |             print("Running level 1: ")
 43 |         for i in tqdm(range(dim)):
 44 |             if self.pretrain:
 45 |                 [ensmb_samples_lst, ensmb_radius_lst, _] = nn_model_map[str([i])]
 46 |             else:
 47 |                 norm_x_subspace = norm_x[:, [i]]
 48 |                 [ensmb_samples_lst, ensmb_radius_lst, _] = self.training_nn_models(norm_x_subspace)
 49 | 
 50 |             subspaced_queries = anom_x[:, [i]]
 51 |             # dim1_scores_lst[i] = scoring(subspaced_queries, ensmb_radius_lst, ensmb_nn_lst)
 52 |             for jj, q in enumerate(subspaced_queries):
 53 |                 dim1_scores_lst[i, jj] = self.single_scoring(q, ensmb_samples_lst, ensmb_radius_lst)
 54 | 
 55 |         D = np.arange(dim)
 56 |         exp_subspace_lst = []
 57 |         for i in tqdm(range(len(anom_idx))):
 58 |             query = anom_x[i]
 59 | 
 60 |             init_score = dim1_scores_lst[:, i]
 61 |             init_subspaces = [[i] for i in range(dim)]
 62 | 
 63 |             if dim <= 50:
 64 |                 keep_score = init_score
 65 |                 keep_subspaces = init_subspaces
 66 |             else:
 67 |                 start = len(init_score) - self.width
 68 |                 keep_score = np.sort(init_score)[start:]
 69 |                 indices = np.argsort(init_score)[start:]
 70 |                 keep_subspaces = [init_subspaces[dd] for dd in indices]
 71 | 
 72 |             for level in range(2, self.max_level):
 73 |                 if self.verbose:
 74 |                     print("--------------------- level: [{}] ----------------------".format(level))
 75 | 
 76 |                 # filter the subspaces that are in previous level (has been explored)
 77 |                 root_subspaces = [s for s in keep_subspaces if len(s) == level - 1]
 78 |                 exploring_subspaces = []
 79 |                 for s in root_subspaces:
 80 |                     other_features = np.setdiff1d(D, s)
 81 |                     for f in other_features:
 82 |                         this_subspace = list(np.sort(s + [f]))
 83 |                         if this_subspace not in exploring_subspaces:
 84 |                             exploring_subspaces.append(this_subspace)
 85 |                             # print("add to exploring set")
 86 |                 if self.verbose:
 87 |                     print("exploring subspaces size: ", len(exploring_subspaces))
 88 |                 exploring_scores = np.zeros(len(exploring_subspaces))
 89 |                 if self.verbose:
 90 |                     iterator = tqdm(range(len(exploring_subspaces)))
 91 |                 else:
 92 |                     iterator = range(len(exploring_subspaces))
 93 | 
 94 |                 for jj in iterator:
 95 |                     s = exploring_subspaces[jj]
 96 |                     if self.pretrain or str(s) in nn_model_map:
 97 |                         [ensmb_samples_lst, ensmb_radius_lst, ensmb_nn_lst] = nn_model_map[str(s)]
 98 |                     else:
 99 |                         norm_x_subspace = norm_x[:, s]
100 |                         nn_model = self.training_nn_models(norm_x_subspace)
101 |                         nn_model_map[str(s)] = nn_model
102 |                         [ensmb_samples_lst, ensmb_radius_lst, ensmb_nn_lst] = nn_model
103 | 
104 |                     # subspaced_query = [query[s]]
105 |                     # query_subspace_score = scoring(subspaced_query, ensmb_radius_lst, ensmb_nn_lst)
106 | 
107 |                     # @NOTE: use a small bias to get larger score for shorter subspace,
108 |                     # then the model tend to use shorter subspaces as explanation if multiple subspaces have same score
109 |                     query_subspace_score = self.single_scoring(query[s], ensmb_samples_lst, ensmb_radius_lst) + \
110 |                                            (dim - len(s)) * 0.001
111 |                     exploring_scores[jj] = query_subspace_score
112 | 
113 | 
114 |                 scores = np.append(keep_score, exploring_scores)
115 |                 subspaces = keep_subspaces + exploring_subspaces
116 | 
117 |                 if self.width > len(scores):
118 |                     start = 0
119 |                 else:
120 |                     start = len(scores) - self.width
121 |                 keep_score = np.sort(scores)[start:]
122 |                 indices = np.argsort(scores)[start:]
123 |                 keep_subspaces = [subspaces[dd] for dd in indices]
124 | 
125 |                 if self.verbose:
126 |                     print("--------------------- level: [{}] ----------------------".format(level))
127 |                     print(keep_score)
128 |                     print(keep_subspaces)
129 |             exp_subspace = keep_subspaces[-1]
130 |             exp_subspace_lst.append(exp_subspace)
131 |         return exp_subspace_lst
132 | 
133 |     def training_nn_models(self, data):
134 |         n_x = data.shape[0]
135 |         ensmb_samples_lst = []
136 |         ensmb_radius_lst = []
137 |         ensmb_nn_lst = []
138 |         for i in range(self.t):
139 |             samples = data[np.random.choice(np.arange(n_x), self.phi, replace=False)]
140 |             ensmb_samples_lst.append(samples)
141 | 
142 |             # the nearest neighbor is data itself, so the n_neighbors is set as 2
143 |             samples_nn = NearestNeighbors(n_neighbors=2).fit(samples)
144 |             ensmb_nn_lst.append(samples_nn)
145 | 
146 |             radius = np.zeros(self.phi)
147 |             for ii, xx in enumerate(samples):
148 |                 # nbr_idx = nbrs_local.kneighbors([xx])[1].flatten()[1]
149 |                 radius[ii] = samples_nn.kneighbors([xx])[0].flatten()[1]
150 |             ensmb_radius_lst.append(radius)
151 | 
152 |         nn_model = [ensmb_samples_lst, ensmb_radius_lst, ensmb_nn_lst]
153 |         return nn_model
154 | 
155 |     def single_scoring(self, single_x, ensmb_samples_lst, ensmb_radius_lst):
156 |         outlier_score = 0
157 | 
158 |         for i in range(self.t):
159 |             radius = ensmb_radius_lst[i]
160 |             samples = ensmb_samples_lst[i]
161 | 
162 |             is_outlier = 1
163 |             for j in range(self.phi):
164 |                 sample = samples[j]
165 |                 threshold = radius[j]
166 |                 dist = np.sqrt(np.sum((sample - single_x)**2))
167 |                 if dist <= threshold:
168 |                     is_outlier = 0
169 |                     break
170 |             outlier_score += is_outlier
171 | 
172 |         outlier_score = outlier_score / self.t
173 |         return outlier_score
174 | 
175 |     # @TODO bug: it is wrong to only consider the nearest sample data in each model
176 |     def scoring(self, test_x, ensmb_radius_lst, ensmb_nn_lst):
177 |         outlier_scores = np.zeros(len(test_x))
178 |         t = len(ensmb_radius_lst)
179 |         num_x = len(test_x)
180 | 
181 |         for i in range(t):
182 |             radius = ensmb_radius_lst[i]
183 |             nn = ensmb_nn_lst[i]
184 | 
185 |             # choosing the nearest data in the model i
186 |             nbr_idx = nn.kneighbors(test_x)[1][:, 0]
187 |             dists = nn.kneighbors(test_x)[0][:, 0]
188 |             thresholds = radius[nbr_idx]
189 | 
190 |             for j in range(num_x):
191 |                 dist = dists[j]
192 |                 threshold = thresholds[j]
193 |                 if dist <= threshold:
194 |                     outlier_scores[j] += 0
195 |                 else:
196 |                     outlier_scores[j] += 1
197 | 
198 |         outlier_scores = outlier_scores / t
199 |         return outlier_scores
200 | 
201 |     def pretrain_nn_models_pwset(self, x, y):
202 |         dim = x.shape[1]
203 |         norm_idx = np.where(y == 0)[0]
204 |         x_norm = x[norm_idx]
205 | 
206 |         full_set = np.arange(dim)
207 |         pwset = utils.powerset(full_set)
208 |         pwset.remove([])
209 |         pwset_nn_model_map = {}
210 | 
211 |         for subspace in tqdm(pwset):
212 |             norm_x_subspace = x_norm[:, subspace]
213 |             nn_model = self.training_nn_models(norm_x_subspace)
214 |             pwset_nn_model_map[str(subspace)] = nn_model
215 |         return pwset_nn_model_map
216 | 
217 |     def fit_od(self, x):
218 |         [ensmb_samples_lst, ensmb_radius_lst, _] = self.training_nn_models(x)
219 |         score_lst = []
220 |         for i in tqdm(range(len(x))):
221 |             xx = x[i]
222 |             score = self.single_scoring(xx, ensmb_samples_lst, ensmb_radius_lst)
223 |             score_lst.append(score)
224 |         return score_lst
225 | 
226 | 
227 | # if __name__ == '__main__':
228 | #     root = 'E:/1-anomaly detection/10-AnoExp/'
229 | #     path = root + "data/tabular/new_pca/cardio_pca.csv"
230 | #     df = pd.read_csv(path)
231 | #     x = df.values[:, :-1]
232 | #     y = np.array(df.values[:, -1], dtype=int)
233 | #
234 | #     model = SiNNE(max_level="full", width=10, ensemble_num=100, sample_num=8, pretrain=False)
235 | #     # exp_subspace_lst = model.fit(x,y)
236 | #     # precision, jaccard, score = evaluation_od.evaluation_od(exp_subspace_lst, x, y,
237 | #     #                                                         "thyroid", model_name="iforest")
238 | #     # metric_lst = [precision, jaccard, score]
239 | #     # print(metric_lst)
240 | #
241 | #     # norm_idx = np.where(y == 0)[0]
242 | #     # norm_x = x[norm_idx]
243 | #     # [ensmb_samples_lst, ensmb_radius_lst, ensmb_nn_lst] = model.training_nn_models(norm_x)
244 | #     # score_lst = []
245 | #     # for i in tqdm(range(len(x))):
246 | #     #     xx = x[i]
247 | #     #     score = model.single_scoring(xx, ensmb_samples_lst, ensmb_radius_lst)
248 | #     #     score_lst.append(score)
249 | #     # from sklearn import metrics
250 | #     # print(metrics.roc_auc_score(y, score_lst))


--------------------------------------------------------------------------------
/model_coin/COIN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import math
  4 | import time
  5 | import numpy as np
  6 | from sklearn.neighbors import NearestNeighbors
  7 | from sklearn.cluster import KMeans
  8 | from sklearn import svm
  9 | from model_coin.prediction_strength import optimalK
 10 | from tqdm import tqdm
 11 | 
 12 | 
 13 | class COIN(object):
 14 |     def __init__(self, data, inds_otlr, nbrs_ratio,
 15 |                  AUG=1.0, MIN_CLUSTER_SIZE=5, MAX_NUM_CLUSTER=4, VAL_TIMES=10, C_SVM=1.,
 16 |                  RESOLUTION=0.05, THRE_PS=0.85, DEFK=0):
 17 |         """
 18 |             data:  Data matrix, each row represents one instance
 19 |             inds_otlr:  A vector with each entry telling whether this instance is outlier (1) or not (0)
 20 |             nbrs_ratio:  The ratio of normal instances as the context for each outlier
 21 |             AUG: An additional feature attached to the input as data augmentation
 22 |             MIN_CLUSTER_SIZE: Minimum number of nodes in each cluster
 23 |             MAX_NUM_CLUSTER: Maximum number of clusters considered in prediction strength computation
 24 |             VAL_TIMES: Number of iterations for computing prediction strength
 25 |             C_SVM: A hyperparameter in SVM (optimum value would be better to be estimated through validation)
 26 |             DEFK: Predefined number of clusters in each context. Value 0 means using Prediction Strength to estimate it.
 27 |         """
 28 |         self.data = data
 29 |         self.dim = data.shape[1]
 30 | 
 31 |         self.inds_otlr = inds_otlr
 32 |         self.ano_idx = np.where(inds_otlr == 1)[0]
 33 | 
 34 |         self.AUG = float(AUG)
 35 | 
 36 |         self.num_inst = data.shape[0]
 37 |         self.num_feat = data.shape[1]
 38 |         self.num_nbrs = int(nbrs_ratio * self.num_inst)
 39 | 
 40 |         self.MIN_CLUSTER_SIZE = MIN_CLUSTER_SIZE
 41 |         self.MAX_NUM_CLUSTER = MAX_NUM_CLUSTER
 42 |         self.VAL_TIMES = VAL_TIMES
 43 |         self.C_SVM = C_SVM
 44 |         self.RESOLUTION = RESOLUTION
 45 |         self.THRE_PS = THRE_PS
 46 |         self.DEFK = DEFK
 47 | 
 48 |         # normal instances
 49 |         self.data_normal = self.data[np.where(self.inds_otlr == 0)[0]]
 50 | 
 51 |         # nearest nbrs object based on normal instances
 52 |         self.nbrs = NearestNeighbors(n_neighbors=self.num_nbrs, n_jobs=-1)
 53 |         self.nbrs.fit(self.data_normal)
 54 | 
 55 |     def interpret_outliers(self, ids_target, sgnf_vec, int_flag=0):
 56 |         """
 57 |             ids_target: Indices of target outliers
 58 |             sgnf_vec: A vector indicating the importance of each attribute, as prior knowledge
 59 |             int_flag: Discrete attribute or not
 60 |             :return: A list of sorted (outlier_ID, outlierness) tuples, a list of clfs, attr importance 2D-array
 61 |         """
 62 | 
 63 |         # Attach 0 to the augmented feature
 64 |         if isinstance(sgnf_vec, int) or isinstance(sgnf_vec, float):
 65 |             sgnf_vec = np.hstack((np.ones(self.num_feat), 0))
 66 |         else:
 67 |             sgnf_vec = np.hstack((sgnf_vec, [0]))
 68 | 
 69 |         # Interpret each target outlier
 70 |         oid_devt_dict = dict()  # id-score tuples
 71 |         score_attr_mat = []
 72 | 
 73 |         for ii in tqdm(range(len(ids_target))):
 74 |             i = ids_target[ii]
 75 | 
 76 |             # Do clustering on the context, build one classifier for each cluster
 77 |             nums_c, clfs, cluster_attr_scale = self.cluster_context(i, int_flag)
 78 | 
 79 |             # Calculate outlierness score
 80 |             devt_i = self.CalculateOutlierness(i, clfs, nums_c, sgnf_vec)
 81 |             oid_devt_dict[i] = devt_i
 82 | 
 83 |             # Find outlying attributes
 84 |             score_attr = np.zeros(self.num_feat)
 85 |             for num_c, clf in zip(nums_c, clfs):
 86 |                 score_attr += num_c * np.abs(clf.coef_[0])      # weighted by the normal cluster size
 87 |             score_attr /= float(np.sum(nums_c))
 88 |             score_attr /= np.sum(score_attr)    # relative importance
 89 |             score_attr_mat.append(copy.copy(score_attr))
 90 |             # print(score_attr)
 91 | 
 92 |         return np.array(score_attr_mat), oid_devt_dict
 93 | 
 94 |     def cluster_context(self, id_outlier, int_flag):
 95 |         # find the context of the outlier
 96 |         dist_btwn, otlr_nbrs = self.nbrs.kneighbors([self.data[id_outlier]])
 97 |         dist_btwn, otlr_nbrs = dist_btwn[0], self.data_normal[otlr_nbrs[0], :]
 98 |         # print(self.data[id_outlier])
 99 |         # print(otlr_nbrs)
100 | 
101 |         # choose the number of clusters in the context
102 |         if self.DEFK == 0:
103 |             k_best = optimalK(otlr_nbrs, self.VAL_TIMES, self.MAX_NUM_CLUSTER, self.THRE_PS)
104 |         else:
105 |             k_best = self.DEFK
106 |         k_best = min(k_best+1, self.MAX_NUM_CLUSTER)     # empirically, it is better to have a lager K
107 |         # print('Best k:', k_best)
108 | 
109 |         # clutering the context
110 |         kmeans = KMeans(n_clusters=k_best, random_state=0).fit(otlr_nbrs)
111 |         label_nbrs = kmeans.labels_
112 | 
113 |         clfs = []
114 |         nbrs_mean = []
115 |         nums_c = []
116 |         cluster_attr_scale = []
117 | 
118 |         # build a linear classifier for each cluster of nbrs
119 |         for c in range(k_best):
120 |             # indices for instances in cluster c
121 |             inds_c = np.where(label_nbrs == c)[0]
122 | 
123 |             # the cluster cannot be too small
124 |             if np.size(inds_c) < self.MIN_CLUSTER_SIZE:
125 |                 continue
126 |             nums_c.append(len(inds_c))
127 | 
128 |             # instances for cluster c
129 |             otlr_nbrs_c = otlr_nbrs[inds_c, :]
130 |             dist_btwn_c = dist_btwn[inds_c]
131 | 
132 |             # distance property of cluster c
133 |             cluster_attr_scale.append(np.hstack((np.max(otlr_nbrs_c, axis=0) - np.min(otlr_nbrs_c, axis=0), 0)))  # scale for each attr
134 | 
135 |             # synthetic sampling to build two classes
136 |             insts_c0 = self.SyntheticSampling(otlr_nbrs_c, self.data[id_outlier], int_flag)
137 |             insts_c1 = otlr_nbrs_c
138 | 
139 |             clf = self.SVCInterpreter(insts_c0, insts_c1)
140 |             clfs.append(clf)
141 |             nbrs_mean.append(np.average(insts_c1, axis=0))
142 | 
143 |         return nums_c, clfs, cluster_attr_scale
144 | 
145 |     def SyntheticSampling(self, insts, otlr, int_flag):
146 |         '''
147 |         Expand the outlier into a class.
148 | 
149 |         insts: normal instances
150 |         otlr: the outlier instance
151 |         expand_ratio: expand ratio
152 |         int_flag: whether to round to int
153 |         :return: two classes of data points
154 |         '''
155 | 
156 |         num_c0_new = insts.shape[0] - 1
157 |         coeff_c0_new = np.random.rand(num_c0_new, insts.shape[0])   # transformation matrix for synthetic sampling
158 |         nbrs_local = NearestNeighbors(n_neighbors=1).fit(insts)
159 |         min_dist_to_nbr = nbrs_local.kneighbors([otlr])[0][0, 0]/insts.shape[1]
160 | 
161 |         for r in range(coeff_c0_new.shape[0]):
162 |             coeff_c0_new[r, :] /= sum(coeff_c0_new[r, :])
163 |         insts_c0_new = np.dot(coeff_c0_new, insts - np.dot(np.ones((insts.shape[0], 1)), [otlr]))
164 |         for r in range(insts_c0_new.shape[0]):                      # shrink to prevent overlap
165 |             insts_c0_new[r, :] *= (0.2 * np.random.rand(1)[0] * min_dist_to_nbr)
166 |         insts_c0_new += np.dot(np.ones((num_c0_new, 1)), [otlr])    # origin + shift
167 |         if int_flag:
168 |             insts_c0_new = np.round(insts_c0_new)
169 |         insts_c0 = np.vstack((otlr, insts_c0_new))
170 | 
171 |         return insts_c0
172 | 
173 |     def SVCInterpreter(self, insts_c0, insts_c1):
174 |         # classification between normal instances and outliers, where outliers have negative output
175 | 
176 |         clf = svm.LinearSVC(penalty='l1', C=self.C_SVM, dual=False, intercept_scaling=self.AUG)
177 |         X_c = np.vstack((insts_c0, insts_c1))
178 |         y_c = np.hstack((np.zeros(insts_c0.shape[0]), np.ones(insts_c1.shape[0])))
179 |         clf.fit(X_c, y_c)
180 |         #print(insts_c1)
181 |         #print(insts_c0)
182 | 
183 |         return clf
184 | 
185 |     def CalculateOutlierness(self, id_outlier, clfs, nums_c, sgnf_vec):
186 |         otlr = self.data[id_outlier]
187 | 
188 |         devt_overall = 0.
189 |         for c in range(len(nums_c)):
190 |             # distance to the boundary
191 |             otlr_aug = np.hstack((otlr, self.AUG))
192 |             w = np.hstack((clfs[c].coef_[0], clfs[c].intercept_[0]/self.AUG))
193 |             w_a = np.hstack((clfs[c].coef_[0], 0))
194 |             dist = -min(0, np.inner(otlr_aug, w))/np.linalg.norm(w_a)
195 | 
196 |             # rescale deviation according to attributes' importance
197 |             devt = np.linalg.norm(np.multiply(dist * w_a / np.linalg.norm(w_a), sgnf_vec))
198 |             if np.isnan(devt):
199 |                 devt = 0.
200 | 
201 |             # weighted by the opponent cluster size
202 |             devt_overall += devt * nums_c[c]
203 | 
204 |         devt_overall /= sum(nums_c)
205 | 
206 |         return devt_overall
207 | 
208 |     def fit(self, sgnf_prior):
209 |         importance_attr, outlierness = self.interpret_outliers(self.ano_idx, sgnf_prior)
210 |         return importance_attr
211 | 
212 |     def weight2subspace(self, weight, r=0.7, num=-1):
213 |         threshold = r * np.sum(weight)
214 |         tmp_s = 0
215 |         exp_subspace = []
216 |         sorted_idx1 = np.argsort(weight)
217 |         sorted_idx = [sorted_idx1[self.dim - i -1] for i in range(self.dim)]
218 |         if num != -1:
219 |             exp_subspace = sorted_idx[:num]
220 |             exp_subspace = list(np.sort(exp_subspace))
221 |             return exp_subspace
222 | 
223 |         for idx in sorted_idx:
224 |             tmp_s += weight[idx]
225 |             exp_subspace.append(idx)
226 |             if tmp_s >= threshold:
227 |                 break
228 |         exp_subspace = list(np.sort(exp_subspace))
229 |         return exp_subspace
230 | 
231 |     def weight2subspace_pn(self, weight):
232 |         exp_subspace = []
233 |         for i in range(len(weight)):
234 |             # exp_subspace.append(list(np.where(weight[i] > 0)[0]))
235 |             if weight[i] > 0:
236 |                 exp_subspace.append(i)
237 |         if len(exp_subspace) == 0:
238 |             exp_subspace = np.arange(len(weight))
239 |         exp_subspace = list(np.sort(exp_subspace))
240 |         return exp_subspace
241 | 
242 |     def get_exp_subspace(self, fea_weight_lst, w2s_ratio, real_exp_len=None):
243 |         exp_subspace_lst = []
244 |         for ii, idx in enumerate(self.ano_idx):
245 |             fea_weight = fea_weight_lst[ii]
246 |             if w2s_ratio == "real_len":
247 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, num=real_exp_len[ii]))
248 |             elif w2s_ratio == "auto":
249 |                 r = math.sqrt(2 / self.dim)
250 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, r=r))
251 |             elif w2s_ratio == "pn":
252 |                 exp_subspace_lst.append(self.weight2subspace_pn(fea_weight))
253 |             else:
254 |                 exp_subspace_lst.append(self.weight2subspace(fea_weight, r=w2s_ratio))
255 |         return exp_subspace_lst


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------